
    Vhh                      U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlmZmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZ d dlmZ d dlZd dlZd dlZd dlmZ d dlm Z! d d	l"m#Z# d d
l$m%Z% d dl&m'Z( d dl)m*Z*m+Z+m,Z, d dl-m.Z.m/Z/ ddl0m1Z1m2Z2 ddl3m4Z4 ddl5m6Z6m7Z7 ddl8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZC ddlDmEZEmFZFmGZGmHZHmIZImJZJ erWd dlKmLZLmMZMmNZN ddlOmPZPmQZQmRZRmSZS ddlTmUZU ddlVmWZWmXZXmYZY ddlZm[Z[  ed      Z\eeeX   geWf   Z]e^e[   Z_ee`ej                  f   Zbe`Zcej                  j                  efd      Zg ej                  ef      ZiddZj G d dej                        Zk e=d        G d! d"             Zlej                   G d# d$             Znej                   G d% d&             Zoej                   G d' d(             Zpej                   G d) d*             Zqej                   G d+ d,             Zreeleneoeqepf   Zsi Ztd-eud.<    G d/ d0      Zvi Zwd1eud2<   	 d	 	 	 	 	 	 	 	 	 dd3Zx G d4 d5e      Zy	 	 	 	 dd6Zz	 	 	 	 	 	 dd7Z{dd8Z|	 d	 	 	 	 	 dd9Z} ej                  d      dd:       Z	 	 	 	 	 	 	 	 dd;Z	 	 	 	 	 	 d d<Zdd=Zej                  ej                  ej
                  ej                  iej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                   fD  ci c]  } | |  c} Zd>eud?<   	 	 	 	 	 	 	 	 dd@Z G dA dB      Z G dC dDe(      Z' G dE dF      Z e
j*                  dGe
j,                  H      ZddIZ G dJ dKe6eeFe         Zej                   G dL dM             Z edi dN eej8                  dO dPQ      dR eej8                  dS dT dUV      dW eej8                  dX dY dZV      d[ eej8                  d\ d] d^V      d_ eej8                  d` da dbV      dc eej8                  dd de dcf      dg eej8                  dh di djV      dk eej8                  dl dm dn dko      dp eej8                  dq dpQ      dr eej8                  ds drQ      dt eej8                  du dvQ      dw eej8                  dx dyQ      dz eej8                  d{ d| d} dz~      d eej8                  d d df      d eej8                  d d dV      d eej8                  d dQ      d eej8                  d dQ      d eej8                  d d dV      d eej8                  d d dV      d eej8                  d dQ      d eej8                  d dQ      d eej8                  d dQ      d eej8                  d dQ      d eej8                  d dQ      d eej8                  d dQ      d eej8                  d dQ      d eej8                  d dQ      d eej8                  d dQ      d eej8                  d dQ      d eej8                  d dQ      d eej8                  d dQ      d eej8                  d dQ      d eej8                  d dQ      d eej8                  d dQ      d eej8                  d dìQ      d eej8                  dń dƬQ      d eej8                  dȄ dɬQ      d eej8                  d˄ d̬Q      d eej8                  d΄ dϬQ      d eej8                  dф dҬQ      Zdeud<   ddՄZ G dք de:      Z G d؄ de<      Z G dڄ de      Zej                   G d܄ dݫ             Z G dބ d߫      Z e       Z G d d      Z G d d      Z ede`      Z edee      ZereejT                  eHeeeedf   f   f   Z G d deeef         Z G d d      Z G d deee         Zej                   G d d             Z ej                  d      dd       Z G d d      Z G d de7      Zyc c} w (      )annotationsN)autoEnum)chain)	AnyCallablecastClassVarGeneric
NamedTupleOptionalTYPE_CHECKINGUnion)TypeVar)ELEMENTWISE_TYPE_PROMOTION_KIND)_pytree)
OrderedSet)int_oo)PythonPrinter)free_symbol_is_typesymbol_is_typeSymT)bound_sympyValueRanges   )configmetrics)DtypePropagationOpsHandler)BasicMathOpsMixinDefaultHandler)boolean_opsDeferredLineBasegenerate_assertIndentedBufferir_dataclass
ScopedDict	sympy_dotsympy_index_symbol
sympy_substriton_typeunique)ops
OpsHandlerOpsValueReductionType	StoreModeV)IteratorMutableMappingSequence)BufferChoiceCallerFixedLayoutIRNodeLoopBody)BaseScheduling	SchedulerSchedulerNode   PythonWrapperCodegen_Tschedulec                x    t         j                  t        j                        rt         j	                  d|        y y )NzData type propagation: %s)schedule_logisEnabledForloggingDEBUGdebug)msgs    N/home/dcms/DCMS/lib/python3.12/site-packages/torch/_inductor/codegen/common.pydata_type_loggerrK   P   s*      /6< 0    c                  <    e Zd ZdZdZdZedd       Zedd       Zy)	WorkspaceZeroModer   r>   r   c                    | |k(  s|t         j                  k(  r| S | t         j                  k(  r|S t        d| d|d      )NzWorkspaceZeroMode.combine(, ))rN   UNINITIALIZEDNotImplementedErrorabs     rJ   combinezWorkspaceZeroMode.combineZ   sK    6Q+999H!///H!$>qe2aU!"LMMrL   c                F    | rt         j                  S t         j                  S N)rN   ZERO_ON_CALLrR   )	zero_fills    rJ   	from_boolzWorkspaceZeroMode.from_boolb   s    $111 ...rL   N)rU   rN   rV   rN   returnrN   )r[   boolr]   rN   )	__name__
__module____qualname__rR   rZ   ZERO_PER_GRAPHstaticmethodrW   r\    rL   rJ   rN   rN   U   s9    MLNN N / /rL   rN   T)frozenc                     e Zd ZU dZded<   ded<   ded<   ded	<   d
Zded<   ej                  Zded<   e	ddd       Z
e	dd       Ze	dd       Ze	dd       ZddZeZd dZd!dZed!d       ZeZeZeZd"dZd"dZd#dZd$dZy)%WorkspaceArga2  A temporary buffer used for a single kernel, then discarded.

    Not registered as a traditional buffer since there are no users,
    so it would be dead code eliminated.

    Args:
        nbytes: The size of the buffer in bytes.
        zero_fill: Whether the buffer should be initialized to zero.

    
sympy.ExprcountrN   	zero_modetorch.devicedevicestr
outer_namews_ptr
inner_nametorch.dtypedtypec                P    |  t        t        j                  j                         S rY   )nextr1   graphworkspace_id)prefixs    rJ   unique_namezWorkspaceArg.unique_name}   s!    $qww334566rL   c                    | j                   |j                   k(  xr4 | j                  |j                  k(  xr | j                  |j                  k(  S rY   )rp   rr   rl   rT   s     rJ   can_joinzWorkspaceArg.can_join   s@     LLALL(XQWW-?XAHHPQPXPXDX	
rL   c                    t        | j                  |j                  z   t        j                  | j                  |j                        | j
                  | j                  | j                  | j                        S N)ri   rj   rr   rl   rp   rn   )	rg   ri   rN   rW   rj   rr   rl   rp   rn   rT   s     rJ   joinzWorkspaceArg.join   sS    ''AGG#'//Q[[I''88||||
 	
rL   c                   | j                   |j                   k(  r2| j                  |j                  k(  r| j                  |j                  k(  sJ t        t	        j
                  | j                  |j                        t        j                  | j                  |j                        | j                   | j                  | j                  | j                        S r|   )rr   rl   rp   rg   sympyMaxri   rN   rW   rj   rn   rT   s     rJ   maximumzWorkspaceArg.maximum   s     GGqww188qxx#7ALLALL<X	
X))AGGQWW-'//Q[[I''88||||
 	
rL   c                    | j                   S rY   rl   selfs    rJ   
get_devicezWorkspaceArg.get_device   s    {{rL   c                    | j                   S rY   rr   r   s    rJ   	get_dtypezWorkspaceArg.get_dtype   s    zzrL   c                f    ddl m}  || j                  | j                  | j                  gdg      S )Nr   )r7   r>   )rl   rr   sizestride)irr7   rl   rr   ri   )r   r7   s     rJ   
get_layoutzWorkspaceArg.get_layout   s.    $;;****3	
 	
rL   c                "    | j                         S rY   )r   r   s    rJ   layoutzWorkspaceArg.layout   s      rL   c                    | j                   gS rY   )ri   r   s    rJ   get_sizezWorkspaceArg.get_size   s    

|rL   c                8    t         j                  j                  gS rY   )r   SOner   s    rJ   
get_stridezWorkspaceArg.get_stride   s    }rL   c                    | j                   S rY   )rn   r   s    rJ   get_namezWorkspaceArg.get_name   s    rL   c                    g S rY   rd   r   s    rJ   get_inputs_that_alias_outputz)WorkspaceArg.get_inputs_that_alias_output   s    	rL   N)
workspace_)rw   rm   r]   rm   )rU   rg   rV   rg   r]   r^   )rU   rg   rV   rg   r]   rg   )r]   rk   )r]   rq   )r]   r7   )r]   list[sympy.Expr]r]   rm   )r]   	list[str])r_   r`   ra   __doc____annotations__rp   torchuint8rr   rc   rx   rz   r}   r   r   get_device_or_errorr   r   propertyr   get_output_specmaybe_get_output_specmaybe_get_layoutr   r   r   r   rd   rL   rJ   rg   rg   i   s    	   OJE;$7 7 
 

 
 
 
 
 %
 ! ! !O&!rL   rg   c                  p    e Zd ZU ded<   ded<   ded<   ej
                  j                  Zded<   dZd	ed
<   y)	TensorArgrm   namebufferrq   rr   rh   offsetNOptional[str]alias_of)	r_   r`   ra   r   r   r   Zeror   r   rd   rL   rJ   r   r      s.    
IKFJ%"Hm"rL   r   c                  4    e Zd ZU ded<   ded<   edd       Zy)SizeArgrm   r   rh   exprc                     y rY   rd   r   s    rJ   r   zSizeArg.alias_of   s    rL   Nr]   r   )r_   r`   ra   r   r   r   rd   rL   rJ   r   r      s    
I
 rL   r   c                      e Zd ZU ded<   y)ConstexprArgrm   r   Nr_   r`   ra   r   rd   rL   rJ   r   r          
IrL   r   c                      e Zd ZU ded<   y)TMADescriptorArgrm   r   Nr   rd   rL   rJ   r   r      r   rL   r   c                  0    e Zd ZU ded<   ded<   dZded<   y)DeviceCodegenSchedulingConstructor
schedulingWrapperConstructorwrapper_codegenNOptional[WrapperConstructor]cpp_wrapper_codegen)r_   r`   ra   r   r   rd   rL   rJ   r   r      s    %%''8<5<rL   r   zdict[str, DeviceCodegen]device_codegensc                      e Zd ZddZddZddZddZddZddZddZ	ddZ
dd	Zdd
ZddZddZddZddZddZddZddZy)DeviceOpOverridesc                    t         rY   rS   r   r   s     rJ   import_get_raw_stream_asz*DeviceOpOverrides.import_get_raw_stream_as       !!rL   c                    t         rY   r   r   
device_idxs     rJ   
set_devicezDeviceOpOverrides.set_device   r   rL   c                    t         rY   r   r   s    rJ   synchronizezDeviceOpOverrides.synchronize   r   rL   c                    t         rY   r   r   s     rJ   device_guardzDeviceOpOverrides.device_guard   r   rL   c                    t         rY   r   r   s    rJ   cpp_device_guardz"DeviceOpOverrides.cpp_device_guard   r   rL   c                    t         rY   r   r   s    rJ   cpp_aoti_device_guardz'DeviceOpOverrides.cpp_aoti_device_guard  r   rL   c                    t         rY   r   r   s    rJ   cpp_stream_guardz"DeviceOpOverrides.cpp_stream_guard  r   rL   c                    t         rY   r   r   s    rJ   cpp_aoti_stream_guardz'DeviceOpOverrides.cpp_aoti_stream_guard  r   rL   c                    t         rY   r   r   s    rJ   cpp_getStreamFromExternalz+DeviceOpOverrides.cpp_getStreamFromExternal
  r   rL   c                    t         rY   r   r   s    rJ   kernel_headerzDeviceOpOverrides.kernel_header  r   rL   c                    t         rY   r   r   s    rJ   kernel_driverzDeviceOpOverrides.kernel_driver  r   rL   c                    t         rY   r   r   s    rJ   cpp_stream_typez!DeviceOpOverrides.cpp_stream_type  r   rL   c                    t         rY   r   r   s    rJ   aoti_get_streamz!DeviceOpOverrides.aoti_get_stream  r   rL   c                    t         rY   r   r   s    rJ   cpp_kernel_typez!DeviceOpOverrides.cpp_kernel_type  r   rL   c                    t         rY   r   r   s    rJ   cpp_device_ptrz DeviceOpOverrides.cpp_device_ptr  r   rL   c                    t         rY   r   r   s    rJ   tma_descriptor_helpersz(DeviceOpOverrides.tma_descriptor_helpers  r   rL   c                    t         rY   r   )r   idxs     rJ   cpp_global_scratchz$DeviceOpOverrides.cpp_global_scratch"      !!rL   Nr   rm   r]   rm   )r   intr]   rm   r   )r   r   r]   zOptional[tuple[str, str]])r_   r`   ra   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rd   rL   rJ   r   r      sW    """""""""""""""""rL   r   zdict[str, DeviceOpOverrides]device_op_overrides_dictc                ,    t        |||      t        | <   y rY   )r   r   )rl   device_schedulingdevice_wrapper_codegendevice_cpp_wrapper_codegens       rJ   register_backend_for_devicer   ?  s     ,13MOFrL   c                      e Zd Z e       Z e       Z e       Z e       Z e       Z e       Z	 e       Z
 e       Z e       Z e       Zy)BackendFeatureN)r_   r`   ra   r   FOREACH	BUCKETIZEINPLACE_BUFFERSMASKED_SCATTER_WITH_INDEXSCANSORTTUPLE_REDUCTIONPREFER_STORE_LOOP_ORDERTRITON_TEMPLATESREDUCE_TO_SINGLE_ELEMENTrd   rL   rJ   r   r   J  sL    fGIfO $6D6DfO"fv#vrL   r   c                   | 
t               S t                t        | t        j                        r| j
                  }n)t        | t              sJ | }t        j                  |      } t        |      }|sJ  |d       }|j                  |       S rY   )	r   init_backend_registration
isinstancer   rl   typerm   get_scheduling_for_deviceget_backend_features)rl   device_typescheduling_ctorr   s       rJ   r  r  W  s~     ~|&%,,'kk&#&&&k*/<O? &J**622rL   c                @    t        |t              sJ |t        |       v S )zSee also V.graph.has_feature)r  r   r  )rl   features     rJ   has_backend_featurer  i  s%     g~...*6222rL   c                <    | t         v rt         |    j                  S d S rY   )r   r   r   s    rJ   r  r  q  s     17?1J?6"--TPTTrL   c                Z    | t         v r#t         |    }|r|j                  S |j                  S y rY   )r   r   r   )rl   cpp_wrapperwrapper_codegen_objs      rJ   get_wrapper_codegen_for_devicer  u  sA      -<V-D   33	
 %44	

 rL   c                    ddl m}  ddlm} ddlm} ddlm} ddlm	} ddl
m} ddlm} dd	lm} dd
lm} t%        d      3| ||dt'        dfd|t(        j*                  j,                  r|n|       t%        d      ||dt'        dfd||       t%        d      t'        d|||       t%        d      t'        d|||       t.        j0                  j3                         }	|	dk7  rCt%        |	      7ddlm}
 	  |
d      } |
d      } |
d      }|r|r|rt'        |	|||       y y y y y y # t8        $ r Y y w xY w)Nr>   )CppScheduling)CppWrapperCpu)CppWrapperCpuArrayRef)CppWrapperGpu)CUDACombinedScheduling)HalideScheduling)MetalScheduling)TritonSchedulingr?   cpu)cpphalidetritonc                6     t         j                     |       S rY   )r   cpu_backend)r   cpu_backendss    rJ   <lambda>z+init_backend_registration.<locals>.<lambda>  s    ?|F,>,>?
K rL   cuda)r  r  c                6     t         j                     |       S rY   )r   cuda_backend)r   cuda_backendss    rJ   r#  z+init_backend_registration.<locals>.<lambda>  s    A}V-@-@A*M rL   xpumpsprivateuseoner   )_get_custom_mod_func
Schedulingr@   CppWrapperCodegen)r  r  cpp_wrapper_cpur  cpp_wrapper_cpu_array_refr  cpp_wrapper_gpur  cuda_combined_schedulingr  r  r  r)  r  r  r  wrapperr@   r  r   r   aot_inductorallow_stack_allocationr   _C_get_privateuse1_backend_name torch.utils.backend_registrationr+  RuntimeError)r  r  r  r  r  r  r  r  r@   private_backendr+  r   r   r   r"  r'  s                 @@rJ   r  r    sm   ".@.@($(- '/ &&

 	$K ""99 "	
 !(0 -&
 	$M 		
 !'/# 		
 !'/# 		
 hh<<>O?*%o6>I	 4\ B23IJO"67J"K _9L+#%#'	 :M_  ? 	+   		s   ?,D1 1	D=<D=c                L    ddl m} g | t        ||j                  |            S )Nr   )FlexibleLayout)r   r;  r'   contiguous_strides)index
index_varssizesr;  s       rJ   index_prevent_reorderingr@    s,    
 $ UUTIj.*K*KE*RSTTrL   c                    |t         | <   y rY   )r   )rl   device_op_overridess     rJ   register_device_op_overridesrC    s     (;V$rL   c                l    t        | t              sJ t        sddlm}m} ddlm} ddlm} t        |    S )Nr>   )cpu_device_op_overridesmps_device_op_overrides)rB  )	r  rm   r    rE  rF  r$  rB  r(  )rl   rE  rF  rB  xpu_op_overridess        rJ   get_device_op_overridesrI    s)    fc"""#F-@#F++rL   zdict[torch.dtype, torch.dtype]DTYPE_TO_COMPUTATION_DTYPEc                r   | t               v rt        j                  S | dv rd|v r|d   S |d   S | dv rt        j                  S | dv rt        j                  S | dk(  rd|v r|d   S |d   S | dk(  rd|v r|d   S |d   S | d	v r$|d   }t
        j                  j                  |      S | d
k(  rd|v r|d   S |d   S y)zK
    Given op name and a list of input dtypes, deduce the output dtype
    )to_dtype
index_exprrr   )randrandn)	get_index	randint64	load_seed	reductionr>   constant)loadstorestore_reductionto_dtype_bitcastN)r!   r   r^   floatint64r1   ru   r   )op_nameargskwargsbuf_names       rJ   deduce_output_dtype_by_namera    s
    +-zz	  
 #*V"3vgAbA	  
 {{	  

 {{	K	")V"3vg@a@	J	")V"3vgAbA	  

 7ww  **	&	&")V"3vgAbArL   c                  `    e Zd Zd
dZddZddZddZddZddZe	dd       Z
e	dd       Zy	)DataTypePropagationc                    || _         d|j                  j                  i| _        |j                  j                         D ]  \  }}|j                  | j                  |<     y Nroot)body
root_blockru   graphs	subblocksitems)r   rg  kvs       rJ   __init__zDataTypePropagation.__init__-  sU    	DOO))B
 NN((* 	%DAqWWDKKN	%rL   c                   |j                   }|D cg c]9  }t        |t        j                  j                        s(|j
                  dk7  s8|; }}t        |      dk(  ry t        d |D              }|sy t        j                  t        j                  |D cg c])  }|j                  t        j                     j                  + c}      S c c}w c c}w )Nplaceholderr   c              3     K   | ]K  }t         j                  |j                  v xr) |j                  t         j                     j                  d u M y wrY   )OptimizationContextkeymetarr   ).0ns     rJ   	<genexpr>zBDataTypePropagation.deduce_node_dtype_by_inputs.<locals>.<genexpr>=  sS      )
   ##qvv- B*../55TAB)
s   AA)all_input_nodesr  r   fxNodeoplenall	functoolsreducepromote_typesrt  rr  rs  rr   )r   nodeinputsrv  input_nodesall_input_nodes_propagateds         rJ   deduce_node_dtype_by_inputsz/DataTypePropagation.deduce_node_dtype_by_inputs5  s    %%
Auxx}}!=!$$-BWA
 
 {q %( )
 !)
 &
"
 *<GHqQVV'++,22H
 	

  Is   )CCC.C
c                b    | j                   |j                     }| j                  |      }|sJ |S rY   )ri  targetpropagate_graph)r   r  	sub_graphrr   s       rJ   deduce_node_dtype_by_subgraphz1DataTypePropagation.deduce_node_dtype_by_subgraphJ  s0    KK,	$$Y/urL   c                   |j                   dk(  ry |j                  dk(  rt        |j                        dk7  ry |j                  t        j
                  k(  r| j                  |j                  d         S t        |j                  t              sJ |j                  j                  d      r| j                  |      S t        |j                  g|j                  i |j                  x}	 |S | j                  |      S )Nrp  outputr>   r   masked_subblock)r{  r  r|  r^  operatorgetitemdeduce_node_dtyper  rm   
startswithr  ra  r_  r  )r   r  output_dtypes      rJ   r  z%DataTypePropagation.deduce_node_dtypeP  s    77m#;;("s499~':;;(***))$))A,77$++s+++;;!!"3455d;; 8 ++ L
   //55rL   c                n   |j                   sJ d }|j                   D ]  }t        j                  |j                  v r|j                  t        j                     }n
t               }| j	                  |      |_        ||j                  t        j                  <   |j                  dk(  s|j
                  } |S )Nr  )nodesrr  rs  rt  r  rr   r  )r   ru   graph_dtyper  opt_ctxs        rJ   r  z#DataTypePropagation.propagate_graphk  s    {{{-1 KK 		,D"&&$))3))$7$;$;<-/ 2248GM18DII)--.{{h&%mm		, rL   c                >    | j                  | j                  d         S re  )r  ri  r   s    rJ   	propagatezDataTypePropagation.propagate}  s    ##DKK$788rL   c                .     | |      j                         S rY   )r  )clsrg  s     rJ   propagate_loopbodyz&DataTypePropagation.propagate_loopbody  s    4y""$$rL   c                    ddl m} ddlm} t	        ||      sJ t	        |j
                  |      sJ t        j                  |j
                        S )Nr   r9   )r=   )	loop_bodyr:   	schedulerr=   r  _bodyrc  r  )r  r  r:   r=   s       rJ   propagate_scheduler_nodez,DataTypePropagation.propagate_scheduler_node  sA    (-$...$**h///"55djjAArL   N)rg  r:   r]   None)r  torch.fx.Noder]   Optional[torch.dtype])r  r  r]   rq   )ru   ztorch.fx.Graphr]   r  )r]   r  )rg  r:   r]   r  )r  r=   r]   r  )r_   r`   ra   rn  r  r  r  r  r  classmethodr  r  rd   rL   rJ   rc  rc  ,  sJ    %
*66$9 % % B BrL   rc  c                  6     e Zd Zddd	 	 	 	 	 	 	 d fdZ xZS )r   T)simplifypc                   |r]t        |t        j                        rCt        t        j
                  d      r)t        j
                  j                  j                  |      }t        | %  |      S )Nsizevars)
r  r   Exprhasattrr1   ru   r  r  superdoprint)r   r   r  r  	__class__s       rJ   r  zPythonPrinter.doprint  sK     
44*9U77##,,T2Dwt$$rL   )r   rh   r  r^   r  r^   r]   rm   )r_   r`   ra   r  __classcell__r  s   @rJ   r   r     s2    48D%%-1%=A%	% %rL   r   c                  T   e Zd ZdZedd       Zedd       Zedd       Zedd       Zedd       Z	edd       Z
edd       Zedd	       Zedd
       Zedd       Zedd       Zedd       Zedd       Zedd       Zedd       Zedd       Zedd       Zedd       Zy)OpDecompositionsz!
    Decomposes inductor ops
    c                    | S rY   rd   )values    rJ   identityzOpDecompositions.identity  s	     rL   c                r    t        j                  t        j                  dt        j                        |       S Nr>   )r,   truedivrU  r   int32xs    rJ   
reciprocalzOpDecompositions.reciprocal  s"    {{3<<5;;7;;rL   c                .    t        j                  | |       S rY   )r,   mulr  s    rJ   squarezOpDecompositions.square  s    wwq!}rL   c                    t        j                  t        j                  dt        j                        t        j
                  |             S r  )r,   subrU  r   float32erfr  s    rJ   erfczOpDecompositions.erfc  s*    wws||Au}}5swwqzBBrL   c                    t        j                  t        j                  t        j                  |             t        j                  |             S rY   )r,   r  expr  r  r  s    rJ   erfcxzOpDecompositions.erfcx  s,    wwswwszz!}-sxx{;;rL   c                    t        j                  t        j                  |       t        j                  dt        j
                              S r  )r,   r  r  rU  r   r  r  s    rJ   expm1zOpDecompositions.expm1  s*    wwswwqz3<<5==#ABBrL   c           	         t        j                  t        j                  |       t        j                  dt	        j                  d      z  t
        j                              S )Nr>   
   r,   r  logrU  mathr   r  r  s    rJ   log10zOpDecompositions.log10  s7    wwswwqz3<<DHHRL0@%--#PQQrL   c           	         t        j                  t        j                  |       t        j                  dt	        j                  d      z  t
        j                              S )Nr>   r   r  r  s    rJ   log2zOpDecompositions.log2  s6    wwswwqz3<<DHHQK#OPPrL   c           
         t        j                  t        j                  | t        j                  t	        j
                  d      t        j                                    S )Nr   )r,   r  r  rU  r  r  r   r  r  s    rJ   exp2zOpDecompositions.exp2  s3    wwswwq#,,txx{EMM"JKLLrL   c           	         t        j                  t        j                  | t        j                  dt        j
                                    S r  )r,   r  addrU  r   r  r  s    rJ   log1pzOpDecompositions.log1p  s+    wwswwq#,,q%++">?@@rL   c                    t        j                  dt        j                        }t        j                  |t        j
                  |t        j                  t        j                  |                         S r  )r,   rU  r   r  r  r  r  negr  ones     rJ   sigmoidzOpDecompositions.sigmoid  sC    ll1ekk*{{3SWWSWWQZ-@ ABBrL   c                r    t        j                  | t        j                  dt        j                              S Nr   )r,   r   rU  r   r  r  s    rJ   reluzOpDecompositions.relu  s"    {{1cll1ekk:;;rL   c                V    t        j                  t        j                  | |      |      S rY   )r,   r  r  r  yzs      rJ   fmazOpDecompositions.fma  s     wwswwq!}a((rL   c                T    t        j                  t        j                  |       |      S rY   )r,   rL  floorrU   rr   s     rJ   floor_to_intzOpDecompositions.floor_to_int      ||CIIaL%00rL   c                T    t        j                  t        j                  |       |      S rY   )r,   rL  ceilr  s     rJ   ceil_to_intzOpDecompositions.ceil_to_int  s    ||CHHQK//rL   c                T    t        j                  t        j                  |       |      S rY   )r,   rL  truncr  s     rJ   trunc_to_intzOpDecompositions.trunc_to_int  r  rL   c           	        t        j                  | |      }t        j                  t        j                  |t        j                  dt
        j                              t        j                  t        j                  |      t        j                  |                  }t        j                  |t        j                  ||      |      S r  )
r,   modand_nerU  r   r  signbitwherer  )rU   rV   rconds       rJ   	remainderzOpDecompositions.remainder  sy    GGAqMxxFF1cll1ekk23FF3;;q>3;;q>2
 yyswwq!}a00rL   c                T    t        j                  t        j                  |       |      S rY   )r,   rL  roundr  s     rJ   round_to_intzOpDecompositions.round_to_int  r  rL   N)r  OpVarTr]   r  r  r  r]   r  )r  r  r  r  r  r  r]   r  )rU   r  rr   rq   r]   r  rU   r  rV   r  r]   r  )r_   r`   ra   r   rc   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rd   rL   rJ   r  r    s}      < <   C C < < C C R R Q Q M M A A C C < < ) ) 1 1 0 0 1 1 1 1 1 1rL   r  z[a-z0-9_.]+|\([^)]*\)|)flagsc                    | d   dk7  st        |       dk  ryd}t        | dd        D ]3  \  }}|dk(  r|dz  }n
|dk(  r|dz  }|dk(  s!|t        |       dz
  k7  s3 y |dk(  sJ y)Nr   (r   Fr>   rQ   T)r|  	enumerate)stringri   ichars       rJ   _all_in_parensr    s    ayC3v;?EVABZ( 43;QJES[QJEA:!s6{Q. A::rL   c                     e Zd Zed'd       Zed(d       Zed)d       Zed)d       Zed)d       Zed)d       Z	ed)d       Z
ed)d       Zed)d	       Zed)d
       Zed*d       Zed+d       Zed+d       Zed+d       Zed+d       Zed+d       Zed,d       Zed-d       Z	 	 d.	 	 	 	 	 	 	 	 	 d/dZ	 	 	 	 	 	 	 	 	 	 d0dZd1dZ	 d2	 	 	 	 	 	 	 	 	 d3dZd4dZ	 	 	 	 	 	 	 	 	 	 d5dZ	 	 	 	 	 	 	 	 d6dZ	 	 	 	 	 	 	 	 	 	 d7dZ	 	 d8	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d9dZd:dZde jB                  ddd 	 	 	 	 	 	 	 	 	 	 	 	 	 d;d!Z"d<d"Z#d=d#Z$ed>d$       Z%e&d?d%       Z'e&d@d&       Z(y)AOpOverridesc                r    t        | t              s t        j                  |       st	        |       r| S d|  dS Nr   rQ   )r  CSEVariable_RE_PAREN_NOT_NEEDED	fullmatchr  )r  s    rJ   parenzOpOverrides.paren  s9     v{+#--f5f% M6(!}rL   c                    t        |       S rY   )repr)r  rr   s     rJ   rU  zOpOverrides.constant  s    E{rL   c                    t        j                  dt        j                        }t        j                  |t        j
                  |t        j                  t        j                  |                         S r  )r,   rU  r   r  r  r  libdevice_expr  r  s     rJ   libdevice_sigmoidzOpOverrides.libdevice_sigmoid  sE    ll1ekk*{{3S->->swwqz-J KLLrL   c                ,    t        j                  |       S rY   )r,   absr  s    rJ   libdevice_abszOpOverrides.libdevice_abs      wwqzrL   c                ,    t        j                  |       S rY   )r,   sqrtr  s    rJ   libdevice_sqrtzOpOverrides.libdevice_sqrt  s    xx{rL   c                ,    t        j                  |       S rY   )r,   cosr  s    rJ   libdevice_coszOpOverrides.libdevice_cos  r  rL   c                ,    t        j                  |       S rY   )r,   sinr  s    rJ   libdevice_sinzOpOverrides.libdevice_sin!  r  rL   c                ,    t        j                  |       S rY   )r,   r  r  s    rJ   libdevice_logzOpOverrides.libdevice_log%  r  rL   c                ,    t        j                  |       S rY   )r,   r  r  s    rJ   r  zOpOverrides.libdevice_exp)  r  rL   c                2    dt         j                  |        S )N~r  r  r  s    rJ   bitwise_notzOpOverrides.bitwise_not-  s    ;$$Q'())rL   c                2    t         j                  |        dS )Nz == 0r%  )rU   s    rJ   logical_notzOpOverrides.logical_not1  s    ##A&'u--rL   c                \    t         j                  |        dt         j                  |       S )Nz & r%  r  r  s     rJ   bitwise_andzOpOverrides.bitwise_and5  +    ##A&'s;+<+<Q+?*@AArL   c                \    t         j                  |        dt         j                  |       S )Nz | r%  r*  s     rJ   
bitwise_orzOpOverrides.bitwise_or9  r,  rL   c                \    t         j                  |        dt         j                  |       S )Nz ^ r%  r*  s     rJ   bitwise_xorzOpOverrides.bitwise_xor=  r,  rL   c                \    t         j                  |        dt         j                  |       S )Nz << r%  r*  s     rJ   bitwise_left_shiftzOpOverrides.bitwise_left_shiftA  +    ##A&'tK,=,=a,@+ABBrL   c                \    t         j                  |        dt         j                  |       S )Nz >> r%  r*  s     rJ   bitwise_right_shiftzOpOverrides.bitwise_right_shiftE  r3  rL   c                .    t        j                  | |      S rY   )r,   r  rT   s     rJ   int_truedivzOpOverrides.int_truedivI  s    
 {{1a  rL   c                T    t        j                  | t        j                  |            S rY   )r,   rV  r   Integer)r   r   s     rJ   rS  zOpOverrides.load_seedP  s    xxemmF344rL   Tc                *    t        t        |            S rY   )r(   rm   )r   varr   checkwrap_negs        rJ   indirect_indexingzOpOverrides.indirect_indexingT  s     "#c(++rL   c                D    t        t        |       j                   d      )Nz,: check_bounds should be handled by CSEProxyrS   r  r_   r   r   r   loweruppers        rJ   check_boundszOpOverrides.check_bounds]  s'     "Dz""##OP
 	
rL   c                D    t        t        |       j                   d      )Nz$: load should be handled by CSEProxyr@  r   r   r=  s      rJ   rV  zOpOverrides.loadd  s%    !Dz""##GH
 	
rL   Nc                D    t        t        |       j                   d      )Nz%: store should be handled by CSEProxyr@  r   r   r=  r  modes        rJ   rW  zOpOverrides.storei  s'     "Dz""##HI
 	
rL   c                D    t        t        |       j                   d      )Nz/: store_reduction should be handled by CSEProxyr@  r   r   r=  r  s       rJ   rX  zOpOverrides.store_reductionp  s%    !Dz""##RS
 	
rL   c                D    t        t        |       j                   d      )Nz): reduction should be handled by CSEProxyr@  r   rr   	src_dtypereduction_typer  s        rJ   rT  zOpOverrides.reductionu  s'     "Dz""##LM
 	
rL   c                D    t        t        |       j                   d      )Nz$: scan should be handled by CSEProxyr@  r   dtypes
combine_fnvaluess       rJ   scanzOpOverrides.scan  s'     "Dz""##GH
 	
rL   c                D    t        t        |       j                   d      )Nz$: sort should be handled by CSEProxyr@  r   rR  rT  stable
descendings        rJ   sortzOpOverrides.sort  s'     "Dz""##GH
 	
rL   c                D    t        t        |       j                   d      )Nz): bucketize should be handled by CSEProxyr@  r   rT  
boundariesboundary_indicesindexing_dtyperightsortersorter_indicess           rJ   	bucketizezOpOverrides.bucketize  s'     "Dz""##LM
 	
rL   c                D    t        t        |       j                   d      )Nz2: halide_clamp only implemented for Halide backendr@  )r   r  r   r<  s       rJ   halide_clampzOpOverrides.halide_clamp  s%    !Dz""##UV
 	
rL   r>   )constraintsrr   is_purepackc               D    t        t        |       j                   d      )Nz<: inline_asm_elementwise only implemented for Triton backendr@  )r   asmrf  rr   rg  rh  r  s          rJ   inline_asm_elementwisez"OpOverrides.inline_asm_elementwise  s'     "Dz""##_`
 	
rL   c                D    t        t        |       j                   d      )Nz.: ops.output should not appear at codegen timeAssertionErrorr  r_   )r   r^  s     rJ   r  zOpOverrides.output  s%    Dz""##QR
 	
rL   c                D    t        t        |       j                   d      )Nz3: ops.placeholder should not appear at codegen timerm  r   r=  s     rJ   rp  zOpOverrides.placeholder  s%    Dz""##VW
 	
rL   c                0     d fd} |_         d|_        |S )Nc                J    t        t        |       j                   d       )Nz does not implement ops.r@  )r   r^  r_  r   s      rJ   unimplementedz1OpOverrides._unimplemented.<locals>.unimplemented  s*    %:&&''?vF rL   T)r   r  r^  r   r_  r   r]   r  )r_   is_unimplemented)r   rs  s   ` rJ   _unimplementedzOpOverrides._unimplemented  s     	
 "&)-&rL   c                p    t        | |d       }t        t        |d       }| xs ||k(  xs t        |dd      S )Nrt  F)getattrr-   )r  r   fn
default_fns       rJ   _is_unimplementedzOpOverrides._is_unimplemented  s?    S$%Zt4
vSz)SWR9KU-SSrL   c                P   |dv sJ |       t         j                         D ]  \  }}t        ||      }|/| j                  |      s&t	        | || j                  |             C|| j                  vsJ d| d| j                          ||_        t	        | |t        |              y )N)r  r  cppvecr  r)  zmultiple definitions of z on )	pointwise_overrides_datark  rw  rz  setattrru  __dict__r_   rc   )r  r  funcnamedataimpls        rJ   _initialize_pointwise_overridesz+OpOverrides._initialize_pointwise_overrides  s    EEMvME6<<> 
	;NHd4(D|((2C3+=+=h+GHs||3 .xjS\\NK3 !)X|D'9:
	;rL   )r  r  r]   r  )r  zUnion[bool, float, int]rr   rq   r]   r  r  )rU   r  r]   r  )r  r  r  r  r]   r  r  )r   rm   r   r  r]   r  TT)
r;  r  r   Union[sympy.Expr, int]r<  r^   r=  r^   r]   sympy.Symbol
r   rh   r   rh   rB  r^   rC  r^   r]   r  )r   rm   r=  rh   r]   r  rY   )
r   rm   r=  rh   r  r  rI  r0   r]   r  )r   rm   r=  rh   r  r  r]   r  )
rr   rq   rN  rq   rO  r/   r  !Union[OpVarT, tuple[OpVarT, ...]]r]   r  )rR  tuple[torch.dtype, ...]rS  zFCallable[[tuple[OpVarT, ...], tuple[OpVarT, ...]], tuple[OpVarT, ...]]rT  tuple[OpVarT, ...]r]   r  )
rR  r  rT  r  rX  r^   rY  r^   r]   r  NN)rT  r  r]  .tuple[str, sympy.Expr, sympy.Expr, sympy.Expr]r^  r  r_  rq   r`  r^   ra   Optional[tuple[str, sympy.Expr]]rb  zOptional[OpVarT]r]   r  )r  r  r   rh   r<  r^   r]   r  )r  r  rj  rm   rf  r   rr   rq   rg  r^   rh  r   r]   r  )r^  r  r]   r  )r=  r   r]   r  )r   rm   r]   zCallable[..., OpVarT]r   rm   r]   r^   )r  rm   r]   r  ))r_   r`   ra   rc   r  rU  r  r  r  r  r  r!  r  r&  r(  r+  r.  r0  r2  r5  r7  rS  r>  rD  rV  rW  rX  rT  rU  rZ  rc  re  r   r  rk  r  rp  ru  r  rz  r  rd   rL   rJ   r  r     s       M M             * * . . B B B B B B C C C C ! ! 5 5 ,, %, 	,
 , 
,

&0
9=
FJ
	

 NR

 *
39
AJ
	


	
	
 	
 &		

 1	
 
+	

'


 #
 

	
'	
 #	
 		

 	
 
	
$ 48+/

 C
 !	

 $
 
 1
 )
 


 &*"]]

 
 #	

 
 
 
 





   T T
 ; ;rL   r  c                  |    e Zd ZU ded<   ded<   dZded<   dZded<   ej                  Zd	ed
<   dZ	ded<   dZ
ded<   y)OverridesDatarm   r   zCallable[..., str]r  NzOptional[Callable[..., str]]r  r|  r   type_promotion_kindr  r)  )r_   r`   ra   r   r  r|  r   DEFAULTr  r  r)  rd   rL   rJ   r  r    sQ    
I	+/F(/+/F(/'// 8  ,0F(/(,C	%,rL   r  airy_aic                    d|  dS )Nzairy_ai_forward(rQ   rd   r  s    rJ   r#  r#    s    (1- rL   special_airy_ai)r  r  r   	bessel_j0c                    d|  dS )Nzbessel_j0_forward(rQ   rd   r  s    rJ   r#  r#        *1#Q/ rL   c                    d|  dS )Nzlibdevice.j0(rQ   rd   r  s    rJ   r#  r#        =1- rL   special_bessel_j0)r  r  r  r   	bessel_j1c                    d|  dS )Nzbessel_j1_forward(rQ   rd   r  s    rJ   r#  r#    r  rL   c                    d|  dS )Nzlibdevice.j1(rQ   rd   r  s    rJ   r#  r#    r  rL   special_bessel_j1	bessel_y0c                    d|  dS )Nzbessel_y0_forward(rQ   rd   r  s    rJ   r#  r#  	  r  rL   c                    d|  dS )Nzlibdevice.y0(rQ   rd   r  s    rJ   r#  r#  
  r  rL   special_bessel_y0	bessel_y1c                    d|  dS )Nzbessel_y1_forward(rQ   rd   r  s    rJ   r#  r#    r  rL   c                    d|  dS )Nzlibdevice.y1(rQ   rd   r  s    rJ   r#  r#    r  rL   special_bessel_y1digammac                    d|  dS )Nzcalc_digamma(rQ   rd   r  s    rJ   r#  r#    s    aS* rL   c                    |  dS )Nz
.digamma()rd   r  s    rJ   r#  r#    s    A3j) rL   )r  r  r|  r   r  c                    d|  dS )Nzcalc_erfcx(rQ   rd   r  s    rJ   r#  r#        A3a( rL   c                    d|  dS )Nzlibdevice.erfcx(rQ   rd   r  s    rJ   r#  r#    s    +A3a0 rL   special_erfcxr  c                    d|  d| d| dS )Nz	std::fma(rP   rQ   rd   r  s      rJ   r#  r#  #  s    is"QCr!A6 rL   c                    d|  d| d| dS )Nzfmadd(rP   rQ   rd   r  s      rJ   r#  r#  $  s    s"QCr!A6 rL   c                    d|  d| d| dS )Nzlibdevice.fma(rP   rQ   rd   r  s      rJ   r#  r#  %  s    s"QCr!A> rL   )r  r  r|  r  r   igammac                    d|  d| dS Nzcalc_igamma(rP   rQ   rd   r*  s     rJ   r#  r#  +      <s"QCq1 rL   igammacc                    d|  d| dS Nzcalc_igammac(rP   rQ   rd   r*  s     rJ   r#  r#  0      =2aS2 rL   gammaincc                    d|  d| dS r  rd   r*  s     rJ   r#  r#  5  r  rL   special_gammainc	gammainccc                    d|  d| dS r  rd   r*  s     rJ   r#  r#  :  r  rL   special_gammaincci0c                    d|  dS )Nzcalc_i0(rQ   rd   r  s    rJ   r#  r#  ?      1o rL   c                    d|  dS Nzlibdevice.cyl_bessel_i0(rQ   rd   r  s    rJ   r#  r#  @      3A3a8 rL   c                    |  dS )Nz.i0()rd   r  s    rJ   r#  r#  A  s    A3e rL   )r  r  r  r|  r   i0ec                    d|  dS )Nz	calc_i0e(rQ   rd   r  s    rJ   r#  r#  F      	!A& rL   c                    |  dS )Nz.i0e()rd   r  s    rJ   r#  r#  G  s    A3f rL   special_i0ei1c                    d|  dS )Nzcalc_i1(rQ   rd   r  s    rJ   r#  r#  L  r  rL   c                    d|  dS Nzlibdevice.cyl_bessel_i1(rQ   rd   r  s    rJ   r#  r#  M  r  rL   
special_i1i1ec                    d|  dS )Nz	calc_i1e(rQ   rd   r  s    rJ   r#  r#  R  r  rL   special_i1elog_ndtrc                    d|  dS )Nzcalc_log_ndtr(rQ   rd   r  s    rJ   r#  r#  W  s    qc+ rL   special_log_ndtrmodified_bessel_i0c                    d|  dS )Nzmodified_bessel_i0_forward(rQ   rd   r  s    rJ   r#  r#  ]      3A3a8 rL   c                    d|  dS r  rd   r  s    rJ   r#  r#  ^  r  rL   special_modified_bessel_i0modified_bessel_i1c                    d|  dS )Nzmodified_bessel_i1_forward(rQ   rd   r  s    rJ   r#  r#  c  r  rL   c                    d|  dS r  rd   r  s    rJ   r#  r#  d  r  rL   special_modified_bessel_i1modified_bessel_k0c                    d|  dS )Nzmodified_bessel_k0_forward(rQ   rd   r  s    rJ   r#  r#  i  r  rL   special_modified_bessel_k0modified_bessel_k1c                    d|  dS )Nzmodified_bessel_k1_forward(rQ   rd   r  s    rJ   r#  r#  n  r  rL   special_modified_bessel_k1ndtrc                    d|  dS )Nz
calc_ndtr(rQ   rd   r  s    rJ   r#  r#  t  s    
1#Q' rL   special_ndtrndtric                    d|  dS )Nzcalc_ndtri(rQ   rd   r  s    rJ   r#  r#  y  r  rL   special_ndtri	polygammac                    |  d| d| d|  dS )Nz == 0 ? calc_digamma(z) : calc_polygamma(rP   rQ   rd   r*  s     rJ   r#  r#  ~  s"    A33A36I!BqcQRS rL   scaled_modified_bessel_k0c                    d|  dS )Nz"scaled_modified_bessel_k0_forward(rQ   rd   r  s    rJ   r#  r#        :1#Q? rL   !special_scaled_modified_bessel_k0scaled_modified_bessel_k1c                    d|  dS )Nz"scaled_modified_bessel_k1_forward(rQ   rd   r  s    rJ   r#  r#    r  rL   !special_scaled_modified_bessel_k1spherical_bessel_j0c                    d|  dS )Nzspherical_bessel_j0_forward(rQ   rd   r  s    rJ   r#  r#    s    4QCq9 rL   special_spherical_bessel_j0zetac                    d|  d| dS )Nzzeta(rP   rQ   rd   r*  s     rJ   r#  r#    s    52aS* rL   special_zetachebyshev_polynomial_tc                    d|  d| dS )Nzchebyshev_polynomial_t_forward(rP   rQ   rd   r*  s     rJ   r#  r#        :1#Rs!D rL   special_chebyshev_polynomial_tchebyshev_polynomial_uc                    d|  d| dS )Nzchebyshev_polynomial_u_forward(rP   rQ   rd   r*  s     rJ   r#  r#    r  rL   special_chebyshev_polynomial_uchebyshev_polynomial_vc                    d|  d| dS )Nzchebyshev_polynomial_v_forward(rP   rQ   rd   r*  s     rJ   r#  r#    r  rL   special_chebyshev_polynomial_vchebyshev_polynomial_wc                    d|  d| dS )Nzchebyshev_polynomial_w_forward(rP   rQ   rd   r*  s     rJ   r#  r#    r  rL   special_chebyshev_polynomial_wlegendre_polynomial_pc                    d|  d| dS )Nzlegendre_polynomial_p_forward(rP   rQ   rd   r*  s     rJ   r#  r#        9!BqcC rL   special_legendre_polynomial_pshifted_chebyshev_polynomial_tc                    d|  d| dS )Nz'shifted_chebyshev_polynomial_t_forward(rP   rQ   rd   r*  s     rJ   r#  r#        B1#Rs!L rL   &special_shifted_chebyshev_polynomial_tshifted_chebyshev_polynomial_uc                    d|  d| dS )Nz'shifted_chebyshev_polynomial_u_forward(rP   rQ   rd   r*  s     rJ   r#  r#    r  rL   &special_shifted_chebyshev_polynomial_ushifted_chebyshev_polynomial_vc                    d|  d| dS )Nz'shifted_chebyshev_polynomial_v_forward(rP   rQ   rd   r*  s     rJ   r#  r#    r  rL   &special_shifted_chebyshev_polynomial_vshifted_chebyshev_polynomial_wc                    d|  d| dS )Nz'shifted_chebyshev_polynomial_w_forward(rP   rQ   rd   r*  s     rJ   r#  r#    r  rL   &special_shifted_chebyshev_polynomial_whermite_polynomial_hc                    d|  d| dS )Nzhermite_polynomial_h_forward(rP   rQ   rd   r*  s     rJ   r#  r#    s    82aSB rL   special_hermite_polynomial_hhermite_polynomial_hec                    d|  d| dS )Nzhermite_polynomial_he_forward(rP   rQ   rd   r*  s     rJ   r#  r#    r
  rL   special_hermite_polynomial_helaguerre_polynomial_lc                    d|  d| dS )Nzlaguerre_polynomial_l_forward(rP   rQ   rd   r*  s     rJ   r#  r#    r
  rL   special_laguerre_polynomial_lzdict[str, OverridesData]r}  c                     t         fdt        j                  j                  t        j                  j                  t        j                  j
                  t        j                  j
                  fD              S )Nc              3  &   K   | ]  }|v  
 y wrY   rd   )ru  r  r   s     rJ   rw  z$is_buffer_removed.<locals>.<genexpr>  s       		   )anyr1   ru   removed_bufferskernelinplaced_to_remover   s   `rJ   is_buffer_removedr*    sU      GG##HH$$GG&&HH''	
  rL   c                  4     e Zd ZdZd fdZddZddZ xZS )DeferredLinezHA line that can be 'unwritten' by adding name to V.graph.removed_buffersc                V    t         |   |       || _        t        |t              rJ y rY   )r  rn  r   r  r"   )r   r   liner  s      rJ   rn  zDeferredLine.__init__  s+    	d$45555rL   c                F    t        | j                        s| j                  S y rY   )r*  r   r.  r   s    rJ   __call__zDeferredLine.__call__  s     +99rL   c                .    t        | j                  |      S rY   )r,  r   )r   r.  s     rJ   	_new_linezDeferredLine._new_line  s    DIIt,,rL   )r   rm   r.  rm   r   )r.  rm   r]   r,  )r_   r`   ra   r   rn  r0  r2  r  r  s   @rJ   r,  r,    s    R6

-rL   r,  c                      e Zd ZdddZy)BracesBufferc                H     t         j                  d fd       } |       S )Nc               3    K   t              D ](  } j                  d       xj                  dz  c_        * t               D ](  } xj                  dz  c_        j                  d       * d  t               D ](  } j                  d       xj                  dz  c_        * t              D ](  } xj                  dz  c_        j                  d       * y w)N{r>   })range	writeline_indent)_r   r   s    rJ   ctxz BracesBuffer.indent.<locals>.ctx  s     6] "s#!" F7^ $!s#$ F7^ "s#!" 6] $!s#$s   C C#)r]   Iterator[None])
contextlibcontextmanager)r   r   r=  s   `` rJ   indentzBracesBuffer.indent  s$    		"	"	$ 
#	$ urL   N)r>   )r   r   r]   z'contextlib.AbstractContextManager[None])r_   r`   ra   rA  rd   rL   rJ   r4  r4    s    rL   r4  c                  "    e Zd ZU ded<   ded<   y)InplacedBufferrm   rp   r   other_namesNr   rd   rL   rJ   rC  rC  	  s    OrL   rC  c                  .    e Zd ZU ded<   dZded<   ddZy)	ArgNamerm   r   Fr^   is_constexprc                B    | j                    | j                  rd S d S )Nz : tl.constexprrG  )r   rG  r   s    rJ   	full_namezArgName.full_name  s*    ))$2C2C.LMMLMMrL   Nr   )r_   r`   ra   r   rG  rI  rd   rL   rJ   rF  rF    s    
IL$NrL   rF  c                      e Zd ZddZy)
RemovedArgc                     y)NREMOVEDrd   r   s    rJ   __str__zRemovedArg.__str__  s    rL   Nr   )r_   r`   ra   rN  rd   rL   rJ   rK  rK    s    rL   rK  c                      e Zd Ze	 	 	 	 	 	 	 	 dd       ZddZddZedd       ZddZddZ	ddZ
ddZdd	Zdd
ZddZd dZd!dZd"dZd#dZd$dZ	 	 d%dZd&dZd'dZd(dZy))
KernelArgsc                ~    |j                  |t              }t        |t              r|  t	        |       x||<   }|S |S rY   )getrM  r  rK  r|  )rw   odictr   result
new_results        rJ   _lookupzKernelArgs._lookup!  sD     */4)Afj)*0#e*'>>E$K*rL   c                J    i | _         i | _        i | _        i | _        g | _        y rY   )input_buffersoutput_buffersinplace_buffersr  workspace_argsr   s    rJ   rn  zKernelArgs.__init__-  s)    -/ACMO/124rL   c                    dj                  dj                  t        t        | j                  | j
                  | j                  | j                  g                  S )NzKernelArgs({})rP   )formatr}   mapr  rX  rY  rZ  r  r   s    rJ   __repr__zKernelArgs.__repr__4  sS    &&II**++,,	

 	
rL   c                "    t        | t              S rY   )r  rK  r)  s    rJ   _buffer_is_marked_removedz$KernelArgs._buffer_is_marked_removedC  s     $
++rL   c                :   t         j                  j                  r4t         j                  j                  j                  j	                  ||      }|t         j                  j
                  vsJ |       || j                  v rt        t        | j                  |         S || j                  v r't        t        | j                  |         j                  S |j                  d      r| j                  d| j                  |      S | j                  d| j                  |      S )Nseedin_ptr)r1   ru   r  mutation_real_namerR  r&  rY  r	   rm   rZ  rC  rp   r  rV  rX  r   s     rJ   inputzKernelArgs.inputH  s    7777$$77;;D$GD1772228D824&&&T006774'''(<(<T(BCNNN??6"<<(:(:DAA||Hd&8&8$??rL   c                   t         j                  j                  r4t         j                  j                  j                  j	                  ||      }|t         j                  j
                  vsJ |       || j                  v r't        t        | j                  |         j                  S | j                  d| j                  |      S )Nout_ptr)r1   ru   r  re  rR  r&  rZ  r	   rC  rp   rV  rY  r   s     rJ   r  zKernelArgs.outputT  s    7777$$77;;D$GD1772228D824'''(<(<T(BCNNN||It':':DAArL   c                f   || j                   vsJ || j                   v rL| j                   |   }t        |t              rJ |j                  j	                  |       || j                   |<   y | j                   j                         D cg c]  }t        |t              s| }}| j                   j                         D cg c]  }t        |t              r| }}t        t        |            t        |      z   }t        d| ||g      }|| j                   |<   || j                   |<   y c c}w c c}w )N
in_out_ptr)	rZ  r  rK  rD  appendrT  r|  r+   rC  )r   
input_nameoutput_namebufvalalive_buffersr&  inplace_buffer_idxs           rJ   make_inplacezKernelArgs.make_inplace\  s<   $"6"6666---&&z2C!#z222OO"";/03D  -  //668!#z2 M   //668c:. O 
 "%VM%:!;c/>R!R /01[)C 03D  ,03D  -!
s   D)>D.c                @   t        |t        j                  |      t        j                  j                         t         j                               }t        | j                        D ]  \  }}t         j                  ||      r?|j                  }t         j                  ||      | j                  |<   |j                  |fc S |j                  |j                  k7  r|j                  |j                  k7  rJ  | j                  j                  |       |j                  dfS )a  
        Allocate or extend a workspace buffer of nbytes bytes.

        This function manages the allocation of a workspace buffer. It either creates
        a new WorkspaceArg or extends an existing one.

        Note:
        - Calling this function will in-place mutate the args by adding or updating
        a WorkspaceArg.
        - The codegen for generating the Python argdefs and call_defs will check
        this field and allocate the buffer accordingly.
        - A new argument "ws_ptr" will be present in the generated code.

        Args:
            nbytes (sympy.Expr): The number of bytes to allocate.
            zero_fill (bool): Whether to initialize the buffer to zero.

        Returns:
            Tuple[str, int]: A tuple containing:
                - "ws_ptr": A string identifier for the workspace pointer.
                - offset: An integer representing the byte offset in the workspace.
        )ri   rj   rl   rn   r   )rg   rN   r\   r1   ru   get_current_device_or_throwrx   r  r[  rz   ri   r}   rp   rn   rk  )r   nbytesr[   argr  existing_argr   s          rJ   	workspacezKernelArgs.workspacev  s    . '11)<77668#//1	
  ))<)<= 	OA|$$\37%++)5):):<)M##A&#..66''3>>9 ++s~~=>	 	""3'~~q  rL   c           
     ~   t         j                  j                         }t        |t        j
                  t        j                  dd|j                   d|j                   |      }| j                  D ]#  }|j                  |j                  k(  s||k(  r#J  | j                  j                  |       |j                  S )a  
        Lazily allocate a graph-wide semaphores buffer with at least min_size.  This is a single buffer shared by
        all kernels and zero initialized once at graph start.  Each kernel must leave the buffer zeroed on exit.

        Warning: multiple calls to this function will return the same buffer.

        Args:
            min_size: the number of int32 semaphores required

        Returns:
            name of the semaphores buffer
        sem_ptrsemaphores_r<  )ri   rj   rr   rp   rn   rl   )r1   ru   rt  rg   rN   rb   r   uint32r  r=  r[  rp   rk  )r   min_sizecurrent_devicerv  rw  s        rJ   
semaphoreszKernelArgs.semaphores  s     <<>'66,, $^%8%8$9>;O;O:PQ!
 !// 	+L&&#..8l***	+ 	""3'~~rL   c                f   t        |t              sJ t        |      |f       t        j                  |      }|| j
                  v r| j
                  |   S | j
                  j                         v r0 t        fd| j
                  j                         D               | j
                  |<   S )Nc              3  F   K   | ]  }|j                        sd   yw)r>   N)r  )ru  rm  r   s     rJ   rw  z)KernelArgs.seed_offset.<locals>.<genexpr>  s     U1!,,tBTQUs   !!)r  r   r  r   r9  r  rT  sum)r   r   r  s    ` rJ   seed_offsetzKernelArgs.seed_offset  s    %%;UU';;%e$DMM!==''4==''))&U(<(<(>UUVW   $erL   c                    t        |t        j                        sJ t        |      |f       |j                  dk(  rd| j
                  |<   y| j                  d| j
                  |      S )Nrc  ks)r  r   Symbolr  r   r  rV  r   s     rJ   r   zKernelArgs.size  sX    $-AT
D/AA-99"(DMM$||D$--66rL   c                    t        | j                  j                         | j                  j                         | j                  j                               S rY   )r   rX  keysrY  r  r   s    rJ   
call_nameszKernelArgs.call_names  sA    ##%t':':'?'?'A4==CUCUCW
 	
rL   c                   | j                   j                  |d      }|t        |t              s|j                  S | j
                  j                  |d      }|t        |t              s|S | j                  j                  |d      S )z;
        Returns inner name of a given outer name.
        N)rZ  rR  r  rK  rp   rY  rX  )r   r   inplacedrm  s       rJ   arg_namezKernelArgs.arg_name  s}     ''++D$7
8Z(H&&&))--dD9":k:+N!!%%dD11rL   c                    |S rY   rd   )r   rn  rr   s      rJ   wrap_ptr_argzKernelArgs.wrap_ptr_arg  s    
rL   c                    t        |      S rY   )rm   )r   r   s     rJ   wrap_size_argzKernelArgs.wrap_size_arg  s    4yrL   c                   ddl m}m} g }g }g }t        | j                  j                               D ]  }t        |t              r|j                  d   }|j                  }t        j                  j                  |      }	||	   }
|j                  |
 d|        |j                  | j                  ||	             |j                  |
 d        | j                  j!                         D ]  \  }}|| j                  v rt        j                  j                  |      }	||	   }
|j                  d|
 d|        |j                  | j                  ||	             |j                  d|
 d        | j"                  j!                         D ]  \  }}|| j                  v st        |t              r%t        j                  j                  |      }	||	   }
|j                  |
 d|        |j                  | j                  ||	             |j                  |
 d        | j$                  j!                         D ]  \  }}|j                  d| d|        |j                  | j'                  |             |j                  d|        t        j                  j(                  slt        j                  j(                  j+                  |        | j,                  rJ d       |||fS )	Nr>   )DTYPE_TO_CPP
INDEX_TYPErN  z* *zconst  zWorkspace not supported on CPU )	cpp_utilsr  r  r+   rZ  rT  r  rK  rD  rp   r1   ru   r   rk  r  rX  rk  rY  r  r  wrapper_codeensure_size_computedr[  )r   r  r  	call_argsarg_defs	arg_typesr  outerinnerrr   	cpp_dtypemaybe_inners               rJ   cpp_argdefszKernelArgs.cpp_argdefs  s   7		t33::<= 		.H(J/((,E''EGG%%e,E$U+IOOykE734T..ue<=	{!_-		. !..446 	4LE5,,,GG%%e,E$U+IOOfYKr%9:T..ue<=vi[23	4 #'"5"5";";"= 	.E;,,,
;
0SGG%%e,E$U+IOOykK=9:T..ue<=	{!_-	. !MM//1 	ALE5OOfZL%9:T//67vj\23ww##$$99%@	A &&I(II&I--rL   c                   g }g }g }g }t        | j                  j                               D ]  }t        |t              r|j                  t        |j                               |j                  |j                  d          |j                  t        j                  j                  |j                  d                |j                  t        |j                  |j                  d   t        j                  j                  |j                  d                       t        | j                  j                         | j                   j                               D ]  \  }}|| j                  v st        |t              r%|j                  t        |             |j                  |       |j                  t        j                  j                  |             |j                  t        ||t        j                  j                  |                    | j"                  j                         D ]  \  }}|j                  t        |             |j                  |       |j                  t%        |             |j                  t'        ||             t        j                  j(                  st        j                  j(                  j+                  |        | j,                  D ]m  }|j                  t        |j                               |j                  |j.                         |j                  |       |j                  |j0                         o ||||fS )NrN  )r   r   rr   )r+   rZ  rT  r  rK  rk  rF  rp   rD  r1   ru   r   r   r   rX  rk  rY  r  r  r   r  r  r[  rn   rr   )	r   r  r  r  precompile_argsr  r  r  rv  s	            rJ   python_argdefszKernelArgs.python_argdefs  s    #%!	')	/1t33::<= 	H(J/OOGH$7$789X11"56QWW..x/C/CB/GHI""!,,#//3''++H,@,@,DE	 "$$&(;(;(A(A(C
 	LE5 ,,,
5*0MOOGEN+U#QWW..u56"" ''++E2	 !MM//1 	ALE5OOGEN+U#T%[)""75%#89ww##$$99%@	A && 	(COOGCNN34S^^,""3'SYY'		(
 OY>>rL   c              #    K   t        | j                  j                               D ]  }t        |t              r|j
                  D ]  }|t        j                  j                  v s|t        j                  j                  v r<|| j                  v r| j                  |   |j                  f || j                  v svt        t        | j                  |         |j                  f   y wrY   )r+   rZ  rT  r  rK  rD  r1   ru   r(  r'  rX  rp   rY  r	   rm   )r   r  others      rJ   aliaseszKernelArgs.aliases@  s     t33::<= 	UH(J/!-- 	UQWW777 ; ;;D...,,U3X5H5HHHD///sD$7$7$>?ATATTT	U	Us   B9C,<0C,c                    t        | j                  j                  |t              t              xr. t        | j
                  j                  |t              t              S rY   )r  rY  rR  rM  rK  rZ  r   s     rJ   
is_removedzKernelArgs.is_removedO  sK    ##D'2J
 N--11$@*M	NrL   c                l   t               }t        | j                  j                               D ]1  }t	        |t
              r|j                  |j                  d          3 | j                  j                         D ]5  \  }}|| j                  v st	        |t
              r%|j                  |       7 |S )NrN  )
r   r+   rZ  rT  r  rK  r  rD  rY  rk  )r   	live_outsr  r  r  s        rJ   live_output_bufferszKernelArgs.live_output_buffersW  s    L	t33::<= 	4H(J/MM(..r23	4 !//557 	!LE5,,,
5*0MMM% 	! rL   N)rw   rm   rS  z6Union[dict[_T, Union[str, RemovedArg]], dict[_T, str]]r   rA   r]   rm   r]   r  r   )r   r   r]   r^   r   )rl  rm   rm  rm   r]   r  )ru  rh   r[   r^   r]   ztuple[str, int])r}  rh   r]   rm   )r   rm   r  r   r]   rm   )r   r  r]   rm   )r]   zIterator[str])r   rm   r]   r   )rn  rm   rr   rq   r]   rm   )r   
SymbolLiker]   rm   )r]   z&tuple[list[str], list[str], list[str]])r]   z?tuple[list[ArgName], list[str], list[KernelArgType], list[Any]])r]   zIterator[tuple[str, str]]r  )r]   zOrderedSet[str])r_   r`   ra   rc   rV  rn  r_  ra  rf  r  rr  rx  r  r  r   r  r  r  r  r  r  r  r  r  rd   rL   rJ   rP  rP     s    		E	 	 
		 	5
 , ,
@B44'!R87


2'.R/?	H/?bUN
rL   rP  c                  Z     e Zd ZdZ	 d	 	 	 	 	 d	 fdZd
dZddZddZddZd
dZ	 xZ
S )r
  aD  A CSEVariable is just a name for an expression but it is useful to be able to annotate them on a backend dependent basis.
    To do so, the backends can simply overload `Kernel.create_cse_var`
    The "CSEVariable.update_on_args" method gives you a hook for annotations
    See example of TritonCSEVariable in triton.py
    c                ~    t         |           t        |t              sJ || _        || _        d| _        || _        y r  )r  rn  r  r   r   bounds	use_countrr   )r   r   r  rr   r  s       rJ   rn  zCSEVariable.__init__k  s<     	&+...	
rL   c                    | j                   S rY   r)  r   s    rJ   rN  zCSEVariable.__str__x  s    yyrL   c                ,    t        | j                        S rY   )hashr   r   s    rJ   __hash__zCSEVariable.__hash__{  s    DIIrL   c                X    t        |t              xr |j                  | j                  k(  S rY   )r  r
  r   )r   r  s     rJ   __eq__zCSEVariable.__eq__~  s!    %-I%**		2IIrL   c                     y rY   rd   )r   r   r^  r_  s       rJ   update_on_argszCSEVariable.update_on_args  s    rL   c                N    | j                   j                   d| j                  dS r	  )r  r_   r   r   s    rJ   r_  zCSEVariable.__repr__  s$    ..))*!DII=::rL   rY   )r   rm   r  ValueRanges[Any]rr   r  r   )r]   r   )r  objectr]   r^   )r   rm   r^  r   r_  r   r]   r  )r_   r`   ra   r   rn  rN  r  r  r  r_  r  r  s   @rJ   r
  r
  d  sH     (,	 ! %	J;rL   r
  AugmentedKeyT)defaultCSEVariableType)boundr  .c                  >   e Zd ZdZ	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZddZddZddZddZ	dd	Z
dd
ZddZ ej                         dddd	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ ej                         df	 	 	 	 	 ddZ ej                         df	 	 	 	 	 	 	 ddZy)CSEz Common subexpression eliminationNc                    || _         || _        i | _        || _        |xs i | _        |xs i | _        |xs t        j                         | _        t               | _
        |xs i | _        y rY   )rw   suffix_cachename_prefixstore_cachereduction_cache	itertoolsri   iter_buffer_idsr   invalidated_storesvarname_map)r   rw   r  r  iter_buffersr  r  r  s           rJ   rn  zCSE.__init__  sm     FH&ALARPR!r 	 6B5VY__EV3=<7B7HbrL   c                6   g | j                   j                         D ]2  \  }}||vs| j                   |= | j                  j                  |       4 |r9| j                  j                         D ci c]  \  }}||v s|| c}}| _        y i | _        y c c}}w rY   )r  rk  r  r  r  )r   	keep_varsr   tmprl  rm  s         rJ   
invalidatezCSE.invalidate  s    44++1134 	2ID#)#$$T*''++D1	2 ,0KK,=,=,?RDAq1	>1a4RDKDK Ss   1B>Bc           	          t        |       | j                  | j                  | j                  | j                  | j
                  | j                  | j                        S )N)rw   r  r  r  r  r  r  )r  rw   r  r  r  r  r  r  r   s    rJ   clonez	CSE.clone  sP    tDz;;;;((--(((( 00
 	
rL   c                    | j                         }t        | j                        |_        t        | j                        |_        t        | j                        |_        |S )zNReturn a copy of using ScopedDict so changes to *_cache aren't visible in self)r  r&   r  r  r  )r   new_cses     rJ   scoped_copyzCSE.scoped_copy  sH    **,#DKK0",T-A-A"B()9)9:rL   c                "    t        t        |      S )z@Override this method to augment cache key with backend specifics)r	   r  r   	cache_keys     rJ   augment_keyzCSE.augment_key  s    M9--rL   c                @    || j                   | j                  |      <   y rY   r  r  )r   r  ro  s      rJ   putzCSE.put  s    36D$$Y/0rL   c                <    | j                  |      | j                  v S rY   )r  r  r  s     rJ   containszCSE.contains  s    	*dkk99rL   c                X    | j                   j                  | j                  |      d       S rY   )r  rR  r  r  s     rJ   try_getzCSE.try_get  s"    {{t//	:DAArL   c                >    | j                   | j                  |         S rY   r  r  s     rJ   rR  zCSE.get  s    {{4++I677rL   T)r  write
assignmentrr   c          	        t        |t              r|j                  }|s|sJ t        |t              rE|j                  j                  |      |_        |xj                  dz  c_        t        t        |      S t        |t              r|j                         }n1t        |t              r|j                  }nt        |t              sJ |}| j                  |      }|s| j                  ||      }| j!                  ||       |rt"        j$                  j&                  r+t"        j$                  j&                  j)                  |d       t        |t              rP|r |j+                  | j,                   | d       |j/                  |       |j+                  | j0                         |S t        |t              rM|sJ |j+                  |j3                  | j,                   | d|j                   | j0                                |S |r | j,                   | d| | j0                   }	n| | j0                   }	|j+                  |	       |rKt4        j6                  j8                  r1|/d| j,                   | dt;        |       d}
|j+                  |
       |S |j                  j                  |      |_        |xj                  dz  c_        |S )	Nr>   T)	only_oncez =z = tl.static_assert(
.dtype == rQ   )r  r.   r  r
  r  tightenr  r	   r  r$   getvaluer"   r.  rm   r  newvarr  r1   r'  current_nodecodegen_originating_infor:  rw   splicer  r2  r   test_configsruntime_triton_dtype_assertr*   )r   r   r   r  r  r  rr   r  r;  r.  assert_lines              rJ   generatezCSE.generate  si    dH%::D
""dK( ++--f5DKNNaN..n-I./		IdC(((Ill9%++fe,CHHY$88((HH))BB$ C  dN3!((DKK=R)@AMM$'$$T[[12 
1  &67%%:$$$++se3tyyk$++'WX, 
% ""&++se3tfT[[MJ"&}5$$T* #"//KK!-(9$++se:VabgVhUiij&k((5 
 ++F3CJMMQM
rL   c                    | j                    t        | j                         }t        j                  j                  |||      }|| j                  |<   |S rY   )r  rt   r  r1   r'  create_cse_varr  )r   r  rr   var_namer;  s        rJ   r  z
CSE.newvar  sR    
 &&'T-A-A(B'CDhh%%h>%("
rL   c                    t        j                  | j                  vfd       t        j                  j                  ||      }|| j                  <   |S )Nc                     d  S )Nzduplicate name: rd   r)  s   rJ   r#  zCSE.namedvar.<locals>.<lambda>0  s    4DTF2K rL   )r   _check_valuer  r1   r'  r  )r   r   r  rr   r;  s    `   rJ   namedvarzCSE.namedvar)  sS     	(((*K	
 hh%%dFE:!$
rL   )rG  rG  r  NNNN)rw   rm   r  rm   r  rm   r  zOptional[itertools.count[int]]r  z.Optional[MutableMapping[str, CSEVariableType]]r  z<Optional[MutableMapping[ReductionCacheKey, CSEVariableType]]r  z$Optional[dict[str, CSEVariableType]])r  zOrderedSet[CSEVariable]r]   r  r]   ztyping.Self)r  rm   r]   r  )r  rm   ro  r  r]   r  )r  rm   r]   r^   )r  rm   r]   zOptional[CSEVariableType])r  rm   r]   r  )r   r$   r   zCUnion[str, CSEVariable, OpsValue, IndentedBuffer, DeferredLineBase]r  r  r  r^   r  r^   rr   r  r]   r  )r  r  rr   r  r]   r  )r   rm   r  r  rr   r  r]   r  )r_   r`   ra   r   rn  r  r  r  r  r  r  r  rR  r   unknownr  r  r  rd   rL   rJ   r  r    sw   *  7;FJ <@II I 	I
 5I DI
I :I0	
.7:B8 $7;#6#6#8'+BB RB
 !B B B %B 
BL $7;#6#6#8'+  % 
	 $7;#6#6#8'+	 ! %	
 
rL   r  c                  0     e Zd Zd fdZddZddZ xZS )CodeGenc                T    t         |           t        j                         | _        y rY   )r  rn  r?  	ExitStack
exit_stackr   r  s    rJ   rn  zCodeGen.__init__8  s    $..0rL   c                :    | j                   j                          | S rY   )r   	__enter__r   s    rJ   r  zCodeGen.__enter__<  s    !!#rL   c                >    | j                   j                  |||       y rY   )r   __exit__)r   exc_typeexc_valexc_tbs       rJ   r  zCodeGen.__exit__@  s      7F;rL   r  r  r  r   r  r   r  r   r]   r  )r_   r`   ra   rn  r  r  r  r  s   @rJ   r  r  7  s    1<rL   r  c                      e Zd ZU dZded<   dZded<   dZded<   	 d 	 	 	 	 	 d! fdZej                  d"d	       Z
ej                  	 	 d#	 	 	 	 	 	 	 d$d
       Zd%dZd%dZd&dZ	 d'	 	 	 	 	 	 	 	 	 d(dZ	 	 	 	 	 	 	 	 	 	 d)dZ	 	 	 	 	 	 	 	 d*dZ	 	 	 	 	 	 	 	 	 	 d+dZd,dZ	 	 d#	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d-dZed.d       Z	 d'	 	 	 	 	 	 	 	 	 d/dZ	 	 	 	 	 	 	 	 	 	 d0dZd1dZd2 fdZd3 fdZd4dZd5dZd5dZ	 	 	 	 d6dZd7dZ d8dZ! xZ"S )9KernelrG  rm   newvar_prefixr  Nz'Optional[Callable[[], OpsHandler[Any]]]	overridesc                P   t         |           |rt        xj                  dz  c_        |xs
 t	               | _        t               | _        t               | _        t               | _	        d| _
        d| _        t        | j                  | j                        | _        t!        t"                  | _        t!        t"                  | _        d | _        d | _        d | _        d | _        t!        t"                  | _        t!        t"                  | _        i | _        d| _        d | _        y )Nr>   r   )r  rn  r   generated_kernel_countrP  r^  r$   loadscomputestoresnum_loadnum_reductionr  r  r  cser   rm   must_keep_buffersstore_buffer_names
_load_mask_load_otherr  node_to_boundsr&  r(  inplace_update_buffersmin_elem_per_threadkernel_name)r   r^  increase_kernel_countr  s      rJ   rn  zKernel.__init__I  s     	 **a/*(JL	#%
%'$&.1$2D2Ddkk.R!+C!2",S/"3)-4859OS)#0",S/"3
 79##$ *.rL   c              #     K   | j                   }|| _         |j                  j                         j                         | _        	 d  || _         y # || _         w xY wwrY   )r  r  r  
get_boundsr  )r   r  priors      rJ   set_current_nodezKernel.set_current_nodek  sO     !! "jj//1<<>	& %DDs   AAA A	AAc              #    K   ||}|d u x}r
t               }| j                  }| j                  }| j                  }| j                  }|| _        || _        || _        |j                         | _        	 d  || _        || _        || _        || _        |r
|rJ d       y y # || _        || _        || _        || _        |r
|rJ d       w w xY ww)Nz$unexpected store inside swap_buffers)r$   r  r  r  r  r  )	r   lbcbsbdisallow_storesr  r  r  r  s	            rJ   swap_bufferszKernel.swap_buffersu  s      :B Dj(?(!B

,,hh
??$	FDJ"DL DKDHEEEv2 	 DJ"DL DKDHEEEv2 s   A/C2B 6)C*C		Cc                    t         rY   r   rF  s      rJ   rV  zKernel.load  r   rL   c                    | j                   }	 | j                  | _         | j                  ||      || _         S # || _         w xY w)z+A load the depends on an index we have read)r  r  rV  )r   r   r=  r!  s       rJ   indirect_loadzKernel.indirect_load  s8    

	DJ99T5)DJDJs	   "8 	Ac                    t         rY   r   rK  s       rJ   rX  zKernel.store_reduction  r   rL   c                    t         rY   r   rH  s        rJ   rW  zKernel.store  
     "!rL   c                    t         rY   r   rM  s        rJ   rT  zKernel.reduction  
     "!rL   c                    t         rY   r   rQ  s       rJ   rU  zKernel.scan  s
     "!rL   c                    t         rY   r   rW  s        rJ   rZ  zKernel.sort  r0  rL   c                    t         rY   r   r   s    rJ   
var_rangeszKernel.var_ranges  r   rL   c                    t         )z3
        See [Note: Inductor bucketize op]
        r   r\  s           rJ   rc  zKernel.bucketize  s
     "!rL   c                    t         rY   r   r   s    rJ   assert_functionzKernel.assert_function  r   rL   c           	     Z   t        |t              rt        |      }t        |t              sJ |t        |t              sJ |t        |t              sJ |r|rd| d| d| d| d	}| d| d| }n|r
| d| }|}n|sJ | d| }|}|r	d| d| d}| j                   d| d| dS )	Nr   z <= z) & (z < rQ   z) | ~(z, "index out of bounds: z"))r  r
  rm   r7  )r   r;  rB  rC  maskr  
cond_prints          rJ   indirect_assertzKernel.indirect_assert  s     c;'c(C#s###}
5# 666}
5# 666U ugT#eC5E7!<D!7$se3ug6JWD&DJL5U#eW%DJtfF4&*D&&'q.FzlRTUUrL   c                    t         rY   r   rA  s        rJ   rD  zKernel.check_bounds  r.  rL   c                    t         rY   r   rp  s     rJ   index_to_strzKernel.index_to_str  r   rL   c           	     (   t         |           | j                  sJ | j                  j	                  t        j                  t        | | j                                            | j                  j	                  t        j                  |              | S rY   )	r  r  r  r   enter_contextr1   set_ops_handlerCSEProxyset_kernel_handlerr  s    rJ   r  zKernel.__enter__  sl    ~~~%%htT^^-=>?	
 	%%a&:&:4&@ArL   c                H    | j                          t        | 	  |||       y rY   )remove_kernel_local_buffersr  r  )r   r  r  r  r  s       rJ   r  zKernel.__exit__  s     ((*7F3rL   c                   t         j                  j                  syt        fd| j                  D              }t        t
                  | j                  D ]N  }|| j                  vs|| j                  j                  vs+j                  ||      s>j                  |       P D ]  }|| j                  j                  v rw| j                  j                  |   }t        |t              rEt        fd|j                  D              }|r| j!                  |       | j"                  j                  |       | j%                  |        y)z
        Any buffers that are both created and have a last use in the
        same kernel can be removed.

        Note that V.graph.scheduler can be None when codegening triton template
        kernels.
        Nc              3  t   K   | ]/  }|j                   v rj                   |   j                          1 y wrY   )name_to_bufdefining_op_name)ru  rn  r  s     rJ   rw  z5Kernel.remove_kernel_local_buffers.<locals>.<genexpr>  s;      &
i+++ !!#&779&
s   58c              3  &   K   | ]  }|v  
 y wrY   rd   )ru  rv  names_to_removes     rJ   rw  z5Kernel.remove_kernel_local_buffers.<locals>.<genexpr>,  s     KaQ/1Kr$  )r1   ru   r  r   r  rm   r  r^  rX  $can_buffer_be_removed_through_fusionr  rZ  r  rK  r}  rD  remove_inplace_bufferr(  remove_buffer)r   fused_node_namesr   rn  removerK  r  s        @@rJ   rE  z"Kernel.remove_kernel_local_buffers  s*    GG%%	% &
..&
 

 %S/+++ 	*DD222		 7 77BB*  ##D)	* $ 
	)Dtyy000ii//5c:.K3??KK..t4''++D1""4(
	)rL   c                    t         j                  d|       t        | j                  j                  |<   | j
                  j                  |       y )Nzremove_buffer(%r))r  rH   rM  r^  rY  r&  r  r   s     rJ   rN  zKernel.remove_buffer3  s;     			%t,)0		  &  &rL   c                    t         j                  d|       t        | j                  j                  |<   | j
                  j                  |       y )Nzremoving_inplace_buffer(%r))r  rH   rM  r^  rZ  r&  r  r   s     rJ   rM  zKernel.remove_inplace_buffer;  s9    		/6*1		!!$'  &rL   c           	        t        |t        t        f      r|D cg c]  }| j                  |       c}S t        j
                  j                  j                  |      }t        |j                  d       }|D ci c]W  }t        |t        j                  t        j                  t        j                  f      r|| j                  j!                  |      Y }}t#        ||      S c c}w c c}w )Nc                    | j                   S rY   r)  )ss    rJ   r#  z(Kernel.rename_indexing.<locals>.<lambda>H  s
    !&& rL   )rs  )r  listtuplerename_indexingr1   ru   r  r  sortedfree_symbolsr   r   UNBACKED_INTSIZEPRECOMPUTED_SIZEr^  r   r)   )r   r=  r  sorted_symbolsreplacementss        rJ   rX  zKernel.rename_indexing@  s    
 edE]+5:;D((+;;  ))%0 2 28HI $
%%II)) tyy~~a  
 
 %.. <
s   C%;AC*c                    t        |i |S rY   )r
  )r   r^  r_  s      rJ   r  zKernel.create_cse_varW  s    D+F++rL   c                Z    |y| j                   j                  |j                               S )zC
        Returns arg name of a given input or output node.
        N)r^  r  r   )r   r  s     rJ   r  zKernel.arg_nameZ  s'     <yy!!$--/22rL   )NT)r^  zOptional[KernelArgs]r  r^   r]   r  )r  r=   r]   r>  r  )r$  r$   r%  Optional[IndentedBuffer]r&  rb  r]   r>  r   rm   r=  rh   r]   r
  r   rm   r=  rh   r  r
  r]   r  rY   
r   rm   r=  rh   r  r
  rI  r0   r]   r  
rr   rq   rN  rq   rO  r/   r  +Union[CSEVariable, tuple[CSEVariable, ...]]r]   rg  rR  r  rS  zUCallable[[tuple[CSEVariable, ...], tuple[CSEVariable, ...]], tuple[CSEVariable, ...]]rT  tuple[CSEVariable, ...]r]   ri  
rR  r  rT  ri  rX  r^   rY  r^   r]   ri  )r]   zdict[sympy.Symbol, sympy.Expr]rT  r
  r]  r  r^  r
  r_  rq   r`  r^   ra  r  rb  zOptional[CSEVariable]r]   r
  r   )
r;  zUnion[CSEVariable, str]rB  r   rC  r   r9  z!Optional[Union[CSEVariable, str]]r]   rm   r  )r=  rh   r]   rm   r  r	  r  r   rm   r]   r  )r=  z;Union[list[sympy.Expr], tuple[sympy.Expr, ...], sympy.Expr]r]   rh   )r^  r   r_  r   r]   r
  )r  r8   r]   r   )#r_   r`   ra   r  r   r  r  rn  r?  r@  r"  r(  rV  r+  rX  rW  rT  rU  rZ  r4  rc  r   r7  r;  rD  r>  r  r  rE  rN  rM  rX  r  r  r  r  s   @rJ   r  r  D  s   M3FC9=I6= PT /( /HL /	 /D & &  (,'+	FF %F %	F
 
F F8"" SW"" *"3>"FO"	"
"" " &	"
 ;" 
5""'"
" (" 
!""'" (" 	"
 " 
!"" 4804"" C" &	"
 $" " 1" ." 
" " " 37V$V V 	V
 0V 
V<""&0"9="FJ"	"
"4%)N''
/P/	/.,3rL   r  c                  8    e Zd ZU dZded<   dZded<   dZded	<   y)
rr  r  zClassVar[str]rs  Nr  rr   rG  rm   ops_name)r_   r`   ra   rs  r   rr   rn  rd   rL   rJ   rr  rr  c  s!    "C"#'E 'HcrL   rr  c                 b    	 dd l } | j                  | j                        S # t        $ r Y y w xY w)Nr   )	undefined)jinja2EnvironmentStrictUndefinedImportError)rq  s    rJ   
jinja2_envru  k  s?    !!,, " 
 	
  s   " 	..c                      e Zd ZdZe	 d		 	 	 	 	 	 	 d
d       Zedd       Ze	 	 	 	 dd       ZddZ	 	 	 	 	 	 ddZ	ddZ
y)KernelTemplatezg
    Base class for defining kernel templates.

    Children classes: TritonTemplate, CUDATemplate
    c                    | j                  d      }t        |      dkD  r|dd  D cg c]  }d|z  |z  |z    c}|dd  dj                  |      S c c}w )NTr>   r  rG  )
splitlinesr|  r}   )sourcenum_indentsindents_spacinglinesr.  s        rJ   indent_except_firstz"KernelTemplate.indent_except_first~  sd     !!$'u:>INqrAE&4<E!"I wwu~s   Ac                    t               }|y t        j                  |j                  d<   ddlm} 	 |j                  |       S # |$ r} G d d|      } ||      |d }~ww xY w)Nr~  r   )TemplateSyntaxErrorc                  (     e Zd Zd fdZddZ xZS )IKernelTemplate._template_from_string.<locals>.DetailedTemplateSyntaxErrorc                    t         |   |j                  |j                  |j                  |j
                         || _        y rY   )r  rn  messagelinenor   filenameoriginal_error)r   r  r  s     rJ   rn  zRKernelTemplate._template_from_string.<locals>.DetailedTemplateSyntaxError.__init__  s>    G$&..&--&++&//	 +9D'rL   c                F   d| j                    d}|d| j                   dz  }t        | j                  d      r| j                  j                  j                  d      }|dz  }t        d| j                   dz
        }t        t        |      | j                   dz         }t        ||      D ]s  }|| j                   dz
  k(  rN||dz    d	||    dz  }t        | j                  d
      s=|dd| j                  j                  dz
  z  z   dz   z  }c||dz    d||    dz  }u |S )NzError in template at line 
zError message: rz  z	Context:
r   r   r>   z: --> columnz     r  z^
z:     )r  r  r  r  rz  splitmaxminr|  r9  r  )r   
error_infor}  startendr  s         rJ   rN  zQKernelTemplate._template_from_string.<locals>.DetailedTemplateSyntaxError.__str__  sA   #=dkk]"!MJODLL>"DDJt22H= $ 3 3 : : @ @ F"l2
 #At{{Q 7!#e*dkkAo>!&uc!2 
KA DKK!O3 *QveAhZr.J J
#*4+>+>#I$.(/*-1D1D1K1Ka1O*P)Q*/)0%&J !+QveAhZr.J J

K &%rL   )r  r  r]   r  r   )r_   r`   ra   rn  rN  r  r  s   @rJ   DetailedTemplateSyntaxErrorr    s    9&rL   r  )ru  rw  r~  filtersrq  r  from_string)rz  envr  er  s        rJ   _template_from_stringz$KernelTemplate._template_from_string  sj    l;-;-O-O)*.#	8??6**" !	8&.A &> .a0a7C!	8s   A A!AA!c                   t         j                  j                  t        | t        t
        f      r.| D ci c]!  }|j                         |j                         # c}n | j                         | j                         idfd}|S c c}w )Nc                >    j                  |       }||S  |       S rY   )rR  )r   rT  _get_dtype_reallookups     rJ   r   z1KernelTemplate._fake_get_dtype.<locals>.get_dtype  s'    ZZ%F!"4((rL   )r   rm   r]   rq   )r1   ru   r   r  rV  rW  r   )	fake_outsrn  r   r  r  s      @@rJ   _fake_get_dtypezKernelTemplate._fake_get_dtype  sr     ''++i$/AJK#cllncmmo5KF((*I,?,?,ABF	)  Ls   &B
c                    || _         y rY   r)  r   s     rJ   rn  zKernelTemplate.__init__  s	    	rL   c                   	 |j                   | j                  di |       y# t        $ rQ}t        j	                  d|t        |       t        j                         t        j                  k         |cY d}~S d}~ww xY w)a%  
        Maybe generates a new ChoiceCaller and appends it into existing choices.
        Returns None if success, otherwise returns the error.

        choices: A list of ChoiceCallers.
        kwargs: Additional kwargs to be passed to self.generate() to generate a new ChoiceCaller.
        Nz3Cannot Append Choice: %s. KernelTemplate type is %s)
stack_inford   )	rk  r  rS   r  infor  getEffectiveLevelrF   INFO)r   choicesr_  r  s       rJ   maybe_append_choicez"KernelTemplate.maybe_append_choice  sn    
	NN=4==2623" 	HHET
002W\\A	   H	s   !$ 	A>AA93A>9A>c                    t         )zM
        Generates a ChoiceCaller instance from the given arguments.
        r   )r   r_  s     rJ   r  zKernelTemplate.generate  s
    
 "!rL   N)   )rz  rm   r{  r   r|  r   r]   rm   )rz  rm   r]   r   )r  zUnion[list[Buffer], Buffer]r]   zCallable[[str], torch.dtype]rl  )r  z	list[Any]r_  r   r]   zOptional[NotImplementedError])r_  r   r]   r6   )r_   r`   ra   r   rc   r~  r  r  rn  r  r  rd   rL   rJ   rw  rw  w  s     >?"%8;	  *8 *8X .	% " ,/	&."rL   rw  c                  "    e Zd Zd Zd fdZddZddZ	 	 d	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 	 	 	 	 ddZddZ	ddZ
	 d	 	 	 	 	 	 	 	 	 ddZdd	Z	 	 	 	 	 	 	 	 	 	 dd
Z	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 	 	 	 	 ddZ	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ xZS )rB  c                b    t         |           ddlm}  |       | _        || _        || _        y )Nr   ValueRangeAnalysis)r  rn  r  r  vr_analysisr'  parent_handler)r   r'  r  r  r  s       rJ   rn  zCSEProxy.__init__  s+    /-/,rL   c                     | j                   gi  t        | j                        i t               ddfd}t	        j
                  |      S )Nr   c                   t         j                  j                  [t         j                  j                         j                  }|dk(  rt
        j                  dk(  n|dk7  rt
        j                  dk(  nd}nd}|r%
dk(  rj                  }n t        
      i 	}nd }t         j                  j                  j                  t         j                  j                  | |      }t
        j                  j                  rY|rWddlm} t%        |t&        t(        f      r|   }t         j                  j                  j+                  d	| d
 ||       d       dz  |j-                  
	       |S )Nr  r  r)  Fmasked)r  rr   r   )r*   r  r  rQ   r>   )r1   ru   r~  rt  r  r   r!  r&  rr   rw  r'  r  r  r  r  r  torch._inductor.codegen.tritonr*   r  rV  rW  r:  r  )rm  
device_strtriton_backendr  csevarr*   r^  r  dtype_handlerr_  r   
output_idxr  s         rJ   do_csez!CSEProxy._default.<locals>.do_cse  si   ww%%1WW@@BGG
 "U* &&(2 "U*  ,,8  "' 8##(;;L$7%$ $'  &$'L  $XX\\**  "	 + F "">>>F lT5M:#/
#;L  **'xz+l:S9TTUV !OJ!!$f5MrL   )rm  rm   r]   r
  )_bound_variablerw  r  r   pytreetree_map)	r   r   r^  r_  r  r  r  r  r  s	    ``` @@@@rJ   _defaultzCSEProxy._default  sh    %%%d<T<V<2++T2DCFC24
1	 1	f vu--rL   c                   ddl m} ddlm} t	        t
        j                  |      rt        j                         S t
        j                  j                  j                  |k(  rt| j                  j                  ^t	        | j                  j                  t              sJ | j                  j                  j                  t        j                               S t        j                   rjt#        ||      r^t%        fddD              rt        j                         S |rJ dd}t'        t)        ||            } t+        | j,                  |      | S t        j                         S )z
        If the variable comes from an FX node, we forward the bound we have already computed
        Else, if the variable when codegen'ing another op, we try to compute its bounds
        r   r  )TritonTemplateKernelc              3  :   K   | ]  }|j                   v   y wrY   )r  )ru  rU  fx_nodes     rJ   rw  z+CSEProxy._bound_variable.<locals>.<genexpr>D	  s     V11&Vs   )set_indirectrT  rU  c                    t        | t              r| j                  S t        | t        j                        rt        |       S | S rY   )r  r
  r  r   r  r   r  s    rJ   arg_to_boundz.CSEProxy._bound_variable.<locals>.arg_to_boundM	  s2    a-88O5::.&q>)HrL   )r  r   r]   r   )r  r  select_algorithmr  r  r1   r'  r   r  interpreterr  r  r  dictrR  r   compute_all_boundsr  r%  rV  r^  rw  r  )	r   r   r^  r_  r  r  r  
arg_boundsr  s	           @rJ   r  zCSEProxy._bound_variable2	  s   
 	0;ahh 45&&((--,,>>T!dkk&@&@&Ldkk88$???;;--11';;N;N;PQQ&&73Et+L V0UVV"**,, : c,56J274++T2J??""$$rL   c                   t        |t              rt        j                  |      }t        |t        j                        sJ |       |j
                  j                  dk  r|rt        j                  |t        j                  |t        j                              }|j
                  j                  dk\  r0t        j                  |d      }t        j                  |||      }n|}t        j                          }|j
                  t        j                          k7  rt        |t        j"                        r|j
                  t        t$         d      z  }t        |j                  |z   |j                  |z         }|j
                  j                  dk\  r"|j
                  t        dt$              z  }	||	z  }| j&                  j(                  j+                  | j&                  j,                  ||      }| j.                  j1                  |||      }
t3        |      ro|j
                  j                  dk\   }t        |t        j"                         xs |j
                  j                  |k   }| j&                  j5                  |
|||       |
S )Nr   rN  )r  )r  r   r   r9  r  r  rB  r,   r  rM  r   longrC  ltr  r   r  Numberr   r'  r  r  r  r  r>  r#   rD  )r   r;  r   r<  r=  stmr  
new_bounds
neg_boundspos	sympy_varassert_lowerassert_uppers                rJ   r>  zCSEProxy.indirect_indexingY	  s    dC ==&D$

+1T1+ ::aggc3>>$

#CD::##q(QB))BS1C %,,.Jzz[0022z$7U !ZZ+vgr*BB
($$t+Z-=-=-D
 ::##q(**{1f'==C!+c!1J++//**4;;+>+>J*WC''99#tUK	5! #

 0 0A 56L)$== 

  4'BL KK$$YlLQrL   c                >    | j                   j                  ||||      S rY   )r'  rD  rA  s        rJ   rD  zCSEProxy.check_bounds	  s     {{''dE5AArL   c                   || j                   j                  j                  v r)t        j                   j                  j                  |       t        |t        j                        r| j                   j                  ||      S | j                   j                  j                  }||v r||   S | j                   j                  ||      }|j                  dk(  r| j                   xj                  dz  c_        |S r  )r'  r  r  r1   r  r  r   r   TMPr+  r  rV  r  r  )r   r   r=  r  outs        rJ   rV  zCSEProxy.load	  s    4;;??555 HH&&**40udhh/;;,,T599kkoo11;t$$kktU+ ==AKK  A% 
rL   c                l   || j                   j                  j                  |<   | j                   j                  r{|t        j
                  j                  v r^| j                   j                  j                  |      }|j                         D ]%  }|| j                   j                  j                  |<   ' y y y rY   )	r'  r  r  r  r1   ru   name_to_buffer
get_outputget_mutations)r   r   r  rn  
other_names        rJ   _update_store_cachezCSEProxy._update_store_cache	  s    ,1##D);;##0F0F(F++**55d;C!//1 @
:?++J7@ )G#rL   c                    | j                   j                  j                  |       || j                  ||       |t        j
                  j                  vr| j                   j                  ||||      S y )N)rI  )r'  r  r  r  r1   ru   r&  rW  rH  s        rJ   rW  zCSEProxy.store	  sd     	&&**40<$$T51qww...;;$$T5%d$CCrL   c                    | j                   j                  j                  |       | j                  ||       |t        j
                  j                  vr| j                   j                  |||      S y rY   )r'  r  r  r  r1   ru   r&  rX  rK  s       rJ   rX  zCSEProxy.store_reduction	  sZ    &&**40  u-qww...;;..tUEBB /rL   c                |    | j                   xj                  dz  c_        | j                   j                  ||||      S r  )r'  r  rT  rM  s        rJ   rT  zCSEProxy.reduction	  s4     	!!Q&!{{$$UI~uMMrL   c                <    | j                   j                  |||      S rY   )r'  rU  rQ  s       rJ   rU  zCSEProxy.scan	  s     {{
F;;rL   c                >    | j                   j                  ||||      S rY   )r'  rZ  rW  s        rJ   rZ  zCSEProxy.sort	  s     {{
CCrL   c           	     D    | j                   j                  |||||||      S )a  
        [Note: Inductor bucketize op]

        Inputs:
        -------
        values: the values to be bucketized.
        boundaries: a tuple containing
          (a) the name of the boundaries tensor (which must be sorted, unless
          the sorting tensor is present),
          (b) the length of the tensor in the last dimension (i.e. the length of
          one set of boundaries),
          (c) the number of elements in the underlying storage (i.e. the length
          of the flattened tensor, ignoring striding), and
          (d) the stride of the tensor in the last dimension.
        boundary_indices: indices into a flattened version of the boundaries
        tensor, of the same size and shape as "values".  Each index points to
        the first element in the set of boundaries to be used for the
        corresponding value.
        indexing_dtype: the dtype to use when indexing into the boundaries
        tensor.  This must be int64 or int32.  This additionally specifies the
        dtype of the return value.
        right: see "Details" below.
        sorter: an optional tuple containing
          (a) the name of an optional sorting tensor, used to access unsorted
          boundaries without reordering the boundaries tensor, and
          (b) the stride of the tensor in the last dimension.
        The values in the sorting tensor are used as indices into the *last*
        dimension of the boundaries tensor, with all other indices matching.
        The size of the sorting and boundaries tensors must be equivalent.
        sorter_indices: must be present if the sorting array is present; see
        "boundary_indices" for the equivalent definition for the boundaries
        tensor.

        Output:
        -------
        The buckets each value belongs in, within a given set of boundaries.  0
        indicates a position before the first boundary, and len(boundaries_set)
        represents a position after the last boundary.

        Details:
        --------
        Given a value and a set of boundaries, calculate the bucket that each
        value belongs to.  This works differently in 1-D and N-D cases.

        for values [[-1, 0, 1, 2], [3, 4, 5, 9]], boundaries [0, 4, 4, 8], right=True
        return =   [[ 0, 1, 1, 1], [1, 3, 3, 4]].

        for values [[-1, 0, 1, 2], [3, 4, 5, 9]], boundaries [[0, 4], [4, 8]], right=True
        return =   [[ 0, 1, 1, 1], [0, 1, 1, 2]]

        Note that in the N-D boundaries case, the shape of "values" and
        "boundaries" must match in every dimension _except_ the last.

        When right == False, bucket i refers to range (boundaries[i], boundaries[i+1]].
        When right == True,  bucket i refers to range [boundaries[i], boundaries[i+1]).

        Boundaries must be non-decreasing, or a sorter must be provided which
        would re-index offsets in a non-decreasing order (e.g. the second output
        of torch.sort(offsets)).  Otherwise, the result is undefined.
        )r'  rc  r\  s           rJ   rc  zCSEProxy.bucketize	  s1    L {{$$
 	
rL   )r'  zKernel[Any]r  zOpsHandler[Any])r   rm   r^  ztuple[Any, ...]r_  zdict[str, Any]r]   r   )r   rm   r^  r   r_  r   r]   r  r  )
r;  r
  r   r  r<  r^   r=  r^   r]   r  r  rc  )r   rm   r  r
  r]   r  rY   re  rd  rf  rh  rj  r  rk  )r_   r`   ra   r   rn  r  r  r>  rD  rV  r  rW  rX  rT  rU  rZ  rc  r  r  s   @rJ   rB  rB    s   D-;.z%%V // %/ 	/
 / 
/bBB&0B9=BFJB	B
"@ SW *3>FO	CNN N &	N
 ;N 
5N	<'	<
	< (	< 
!	<D'D (D 	D
 D 
!D  4804N
N
 CN
 &	N

 $N
 N
 1N
 .N
 
N
rL   rB  )rI   rm   r]   r  rY   )
rl   rm   r   r   r   r   r   r   r]   r  )rl   Union[torch.device, str, None]r]   zOrderedSet[BackendFeature])rl   r  r  r   r]   r^   )rl   rm   r]   zOptional[SchedulingConstructor])F)rl   rm   r  r^   r]   r   r  )r=  Sequence[sympy.Expr]r>  r  r?  r  r]   r   )rl   rm   rB  r   r]   r  )rl   rm   r]   r   )r]  rm   r^  r   r_  r   r]   r  )r  rm   r]   r^   rd   r  )r]   r   )
__future__r   r?  dataclassesenumr~  r  rF   r  r  retypingr   r   r   r   r   r	   r
   r   r   r   r   r   typing_extensionsr   r   r   torch.fxtorch._prims_commonr   torch.utilsr   r  torch.utils._ordered_setr   torch.utils._sympy.numbersr   torch.utils._sympy.printersr   _PythonPrintertorch.utils._sympy.symbolr   r   r   torch.utils._sympy.value_rangesr   r   rG  r   r   dtype_propagationr   ops_handlerr   r    utilsr!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   virtualizedr,   r-   r.   r/   r0   r1   collections.abcr2   r3   r4   r   r5   r6   r7   r8   r  r:   r  r;   r<   r=   r2  r@   rA   r   r  r   rm   r  r  r  _logginggetArtifactLoggerr_   rD   	getLoggerr  rK   rN   rg   	dataclassr   r   r   r   r   KernelArgTyper   r   r   r   r   r   r  r  r  r  	lru_cacher  r@  rC  rI  bfloat16r[  float16r^   r  float64int8int16r  r\  r   uint16r|  uint64rJ  ra  rc  r  compile
IGNORECASEr  r  r  r  r  INT_TO_FLOATr}  r*  r,  r4  rC  rF  rK  rM  rP  r
  r  r  rW  rr   ReductionCacheKeyr  r  r  rr  ru  rw  rB  r   s   0rJ   <module>r	     s8   "         	   
 
 
 &    ? ) / - G O O D  : ;    Q P BB>>$DD-	B$hy&9%:N%JK23sELL()J F~~//*Eg!=
/		 /( T[ [ [| # # #          = = = lIw8H,VW,.) .3" 3"l :< 6 ;8 @D	, / !=	
 

&T 
&3*33$3*35C3	3U
 &+

"
!
 TJ JZUU$U  U 	U;;&7;	;, 
NNEKK	MM5;;> JJMMMMJJKKKKKKKKLLLLLL
 	u> : ,''' ' 	'T_B _BD%N %S1 S1l "rzz";2==Q a;#%5z# a;H - - -  6: _6;HH-_6 ;HH/- 	_6 ;HH/- 	_6$ ;HH/- 	%_60 ;HH/- 	1_6< ;HH*)	=_6L ;HH(0	M_6X 	;HH66>	Y_6h ;HH1i_6r ;HH2s_6| ;HH1}_6F ;HH2 G_6P ;HH%8$Q_6^ 	;HH&%		__6j ;HH%8	k_6v 	;HH&	w_6@ ;HH+A_6L %;HH88)	M_6X %;HH88)	Y_6d %;HH8)e_6n %;HH8)o_6z 
;HH'
{_6D ;HH(E_6N ;HHSO_6\ ,;HH?0]_6f ,;HH?0g_6r &;HH9*s_6| 
;HH*
}_6F );HHD-G_6P );HHD-Q_6Z );HHD-[_6d );HHD-e_6n (;HHC,o_6x $1;HHL5$y_6B $1;HHL5$C_6L $1;HHL5$M_6V $1;HHL5$W_6` ';HHB+a_6j (;HHC,k_6t (;HHC,u_6 2 _D	-# -"> *Z 
 N N N 
 ,A AH
!; !;H 5+;Tk5c!1223	5a'/=0
1 aH
< 
<\3Wgo. \3~    T p" p"fz
~ z
w7s   
c