
    nVhL              	         d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZ ddlmZ d dlmZ dd	lmZmZ e d e!d
        Z" ed      Z# G d dejH                        Z%dGdZ& G d d      Z'i Z(g Z)d Z*dHdZ+ G d dee#         Z,d Z-d Z.i ddddddddddd d!d"d!d#dd$d%d&d%d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:Z/ e0e/jc                               D ]  Z2e2e/e2<   	  G d; d<e,e#         Z3edId=       Z4edddddddd>	 	 	 	 	 	 	 	 	 	 	 	 	 dJd?       Z4	 dKdddddddd>	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dLd@Z4 G dA dB      Z5 G dC dD      Z6dE Z7dF Z8y)M    )annotationsdivisionN)defaultdict)cached_property)
CallableGenericIterableOptionalTypeVarUnionoverloadDictAnyTuple   )driver)
ModuleType)find_paths_ifget_iterable_pathz.runtime.jitTc                  v     e Zd ZdZd fdZed        Zd Zd Zd Z	d Z
d Zd	 Zd
 Zd Zd Zd Zd Z xZS )DependenciesFindera  
    This AST visitor is used to find dependencies of a JITFunction. This can
    be used to invalidate a JITFunction's hash when its source code -- or
    that of its dependencies -- changes.

    This visitor also keeps track of the global variables touched by the
    JITFunction.  When we launch the kernel, we check that these have the same
    values as they did when we ran this visitor.  If not, we raise an error (or
    otherwise we could recompile).
    c                    t         |           || _        t        j                  |j                  d            | _        || _        h d| _        i | _	        d| _
        y )Nutf-8>
   intlenmaxminlistfloatprintrangegetattr
isinstanceF)super__init__namehashlibsha256encodehasherglobalssupported_python_builtinsused_global_valsvisiting_arg_default_value)selfr'   r,   src	__class__s       B/home/dcms/DCMS/lib/python3.12/site-packages/triton/runtime/jit.pyr&   zDependenciesFinder.__init__%   sU    	nnSZZ%89 *
&. TV*/'    c                6    | j                   j                         S N)r+   	hexdigestr0   s    r3   retzDependenciesFinder.retI   s    {{$$&&r4   c                    t        j                  |j                        ryt        |dd      }|j	                  t
              S )NT
__module__ )inspect	isbuiltinfuncr#   
startswithTRITON_MODULE)r0   noder?   modules       r3   _is_triton_builtinz%DependenciesFinder._is_triton_builtinM   s6    TYY'|R0  //r4   c                >   t        |t              r| j                  j                         |j                  j                         z  D ]_  }|\  }}| j                  |   \  }}|j                  |   \  }}||k7  s2t	        d| d| d| j
                   d|j                   d| d       | j                  j                  |j                         |j                  }|t        t        |dd            z  }| j                  j                  |j                  d	             y y )
NGlobal variable z has value z when compiling z, but inner kernel z has conflicting value z7 from when it was first compiled.  This is not allowed.noinlineFr   )r$   JITFunctionr.   keysRuntimeErrorr'   __name__update	cache_keystrr#   r+   r*   )r0   r?   kvar_name_v1v2func_keys           r3   _update_hashzDependenciesFinder._update_hashS   s3   dK( **//1D4I4I4N4N4PP !--a0A--a0A8&*8*KtCSTXT]T]S^^qrvrr  rA  AX  Y[  X\  \S  T  !!(()>)>?~~HGD*e<==HKKxw78 )r4   c                $   t        |j                        t        j                  u r|j                  S |j                  | j
                  v ry | j                  j                  |j                  d       }|| j                  st        |      t        urot        |t              s_t        |dd      sR|j                  | j                  vr:|| j                  f| j                  |j                  t	        | j                        f<   | j                  |       |S )N__triton_builtin__F)typectxastStoreidlocal_namesr,   getr/   r   r$   rH   r#   r-   r.   rU   )r0   rB   vals      r3   
visit_NamezDependenciesFinder.visit_Namee   s    >SYY&77N77d&&&lltww-
 O 77 IZ/ #34WSJ^`e=fGG4#A#AABEt||ATD!!477Bt||,<"=>#
r4   c                ^    |j                   D cg c]  }| j                  |       c}S c c}w r6   )eltsvisit)r0   rB   elts      r3   visit_TuplezDependenciesFinder.visit_Tuple   s$     ,0995C

3555s   *c                X   | j                  |j                        }t        |t        j                        r6| j                  |j                        }t        |t        j                        r6|t        |dd      t        k(  ry t        ||j                        }| j                  |       |S )NrK   r<   )	rc   valuer$   rZ   	Attributer#   rA   attrrU   )r0   rB   lhsr9   s       r3   visit_Attributez"DependenciesFinder.visit_Attribute   s    jj$cmm,**SYY'C cmm,;73
B7=Hc499%#
r4   c                    |j                   j                   D ch c]  }|j                   c}| _        | j                  |       y c c}w r6   )argsargr]   generic_visit)r0   rB   rn   s      r3   visit_FunctionDefz$DependenciesFinder.visit_FunctionDef   s4    /3yy~~>CGG>4  ?s   Ac                p     fd}t        j                  |j                  |j                  |j                  r|j                  gng |j
                        D ]  } j                  |         ||j                         |j                   j                  |j                          ||j                         y )Nc                    	 j                   rJ d_         | D ]  }|j                  |        	 d_         y # d_         w xY w)NTF)r/   rc   )defaultsexprr0   s     r3   visit_defaultsz:DependenciesFinder.visit_arguments.<locals>.visit_defaults   sS    8::::26/$ )D'

4() 38/%/s   < < 	A)
	itertoolschainposonlyargsrm   vararg
kwonlyargsrc   kw_defaultskwargrs   )r0   rB   ru   rn   s   `   r3   visit_argumentsz"DependenciesFinder.visit_arguments   s    	8 ??4#3#3TYYQUQ\Q\bdfjfufuv 	CJJsO	 	t''(::!JJtzz"t}}%r4   c                    | j                  |      }t        |t              r| xj                  t	        |      z  c_        y | j                  j                  |       y r6   )rc   r$   r   r]   setadd)r0   rB   targets      r3   visitAssnTargetz"DependenciesFinder.visitAssnTarget   sE     D!fd#F+  (r4   c                    t        |j                        dk7  rt        d      | j                  |j                  d          | j	                  |       y )N   z2Simultaneous multiple assignment is not supported.r   )r   targets	TypeErrorr   ro   r0   rB   s     r3   visit_AssignzDependenciesFinder.visit_Assign   sG    t||!
 PQQT\\!_- 	4 r4   c                \    | j                  |j                         | j                  |       y r6   r   r   ro   r   s     r3   visit_AnnAssignz"DependenciesFinder.visit_AnnAssign   $    T[[) 	4 r4   c                \    | j                  |j                         | j                  |       y r6   r   r   s     r3   	visit_ForzDependenciesFinder.visit_For   r   r4   )returnNone)rK   r;   __qualname____doc__r&   propertyr9   rD   rU   r`   re   rk   rp   r}   r   r   r   r   __classcell__r2   s   @r3   r   r      sZ    	"0H ' '09$<6
!
&@)!!!r4   r   c                t    t        | t              r| j                  S t        | t              r| S t	        |       S r6   )r$   rX   rK   rN   repr)tys    r3   _normalize_tyr      s.    "d{{	B		8Or4   c                      e Zd ZdZ	 	 ddZed        Zed        Zed        Zed        Z	ed        Z
ed        Zed	        Zy
)KernelParamzBRepresents a parameter (name plus metadata) to a @jit'ed function.c                <    || _         || _        || _        || _        y r6   )num_paramdo_not_specializedo_not_specialize_on_alignment)r0   r   paramr   r   s        r3   r&   zKernelParam.__init__   s     !2.L+r4   c                .    | j                   j                  S r6   )r   r'   r8   s    r3   r'   zKernelParam.name   s    {{r4   c                    | j                   j                  r1| j                   j                  t        j                  j                  k(  ryt        | j                   j                        S )Nr<   )r   
annotationr=   	Parameteremptyr   r8   s    r3   r   zKernelParam.annotation   sD    {{%%)?)?7CTCTCZCZ)ZT[[3344r4   c                    | j                   }dD ]4  \  }}||j                  |      t        |      z   d  }|s)||v s.| | c S  |dk(  ryy)N))uintu)r   iboolu1r<   )r   findr   )r0   r   ty1ty2widths        r3   annotation_typezKernelParam.annotation_type   se    __
5 	'HCzs3c#h>?@E
*ug&	' r4   c                    d| j                   v S N	constexpr)r   r8   s    r3   is_constexprzKernelParam.is_constexpr  s    doo--r4   c                <    d| j                   v xr | j                   S )Nconst)r   r   r8   s    r3   is_constzKernelParam.is_const
  s    $//)C$2C2C.CCr4   c                .    | j                   j                  S r6   )r   defaultr8   s    r3   r   zKernelParam.default  s    {{"""r4   c                d    | j                   j                  t        j                  j                  k7  S r6   )r   r   r=   r   r   r8   s    r3   has_defaultzKernelParam.has_default  s#    {{""g&7&7&=&===r4   N)r   r   r   zinspect.Parameterr   r   r   r   )rK   r;   r   r   r&   r   r'   r   r   r   r   r   r   r    r4   r3   r   r      s    LM15M     5 5
   . . D D # # > >r4   r   c                 l     t         r	t         d   S ddlm  d fd	t         j                         S )Nr   )r   c                     yt         t              rd j                  fS t               rd fS t         t              ryt         t              r7|r | d|      nd } dk(  r|ryd k  r	 d	k  rd
|fS d k  r	 dk  rd|fS d|fS t         t
              ryt         d      ryt         t              rQ D cg c]  } ||       }} fd} ||D cg c]  }|d   	 c}      }	 ||D cg c]  }|d   	 c}      }
|	|
fS  j                  |f}t        j                  |d       }|:|d   rdndt        t        |d         j                  d      d      z   }|t        |<   |r | d|      nd }||fS c c}w c c}w c c}w )N)r   Nr   )i1Nr   )alignr   )r   r   i   ii32l            l    u64i64)fp32Ntma_desc_cpu_ptr)	nvTmaDescNc                N    t        d      r t              |  S t        |       S )N_fields)hasattrrX   tuple)valsrn   s    r3   <lambda>zAcreate_specialize_impl.<locals>.specialize_impl.<locals>.<lambda>;  s&    '#y:Qid3i&6 W\]aWb r4   r   z*k*.r   tensor)r$   rH   rM   r   r   r    r   r   dtype	dtype2strr^   type_canonicalisation_dictrN   split)rn   specialize_extrar   specialize_valuer   keyxspec
make_tupletysrI   dskresr   specialize_impls   `            r3   r   z/create_specialize_impl.<locals>.specialize_impl!  s   ;&[)//Y'%%T"S!?O"3U;UYCax,'SSI%5s|###"2s|#s|#U#!S,-&U#BEFQOA'78FDFbJD1qad12CT2qt23D; 99h'C--T*C{"1vt32LSQTUVQW[M^M^_bMcdfMg2hh!$	#BR"3>X\C: G12s   >E>F4F)FTT)specialize_impl_cachelanguager   append)r   r   s   @@r3   create_specialize_implr     s0    $R(($&P   1r4   c                4    t               } || d |      d   S )Nc                     y r6   r   )rQ   kwargss     r3   r   zmangle_type.<locals>.<lambda>O  s    r4   )r   r   )r   )rn   
specializer   s      r3   mangle_typer   M  s     ,.O3 8:VWXYYr4   c                       e Zd ZU ded<   ddZy)KernelInterfacer   runc                      fdS )z
        A JIT function is launched with: fn[grid](*args, **kwargs).
        Hence JITFunction.__getitem__ returns a callable proxy that
        memorizes the grid.
        c                 .     j                   | dd|S )NFgridwarmup)r   )rm   r   r   r0   s     r3   r   z-KernelInterface.__getitem__.<locals>.<lambda>[  s    xtxx$T%'YRX'Y r4   r   )r0   r   s   ``r3   __getitem__zKernelInterface.__getitem__U  s     ZYr4   N)r   r   )rK   r;   r   __annotations__r   r   r4   r3   r   r   R  s    	
FZr4   r   c           
        |j                         D ci c],  \  }}||j                  j                  dk(  rt        |      n|. }}}dd l}| ||j                         D cg c]  }t        |       c}t        |j                               |j                         D cg c]  }t        |       c}t        |j                               |j                  d}	|j                  |	      }
|
S c c}}w c c}w c c}w )Nr   r   )r'   	signatureconstant_keysconstant_vals
attrs_keys
attrs_valsoptionsr   )
itemsr2   rK   rN   jsonrI   r   values__dict__dumps)r'   r   	constantsattrsr   r   rg   r   r   objserialized_objs              r3   serialize_specialization_datar  _  s    enetetevwWaWZ\aEOO$<$<$Gc%jURwIw9QZQ_Q_Qa?bAQ?bY %**,0OQa0O_cdidpdpdr_s##CC
 ZZ_N x @c0Os   1C, C2C7c                X   t        | j                        t        |      k(  sJ g }t        | j                  j                         |      D ]  \  }}|j                  r|j                  d| d       (|j                  rdnd}|j                  rdnd}|j                  rdnd}d| d| d| d| d	}	|j                  r#|j                  d|j                   d	|	 d
       |j                  |	         d }
ddj                  t        t        |
| j                  j                                     dgz          ddj                  | j                  j                         D cg c]
  }d| d|  c}       ddj                  |       d}| j                  j                         D ci c];  \  }}|j                  t        j                   j"                  urd| |j                  = }}}t$        |d<   t'               |d<   |j(                  |d<   t+        ||       |d   S c c}w c c}}w )a2  
    Equivalent to sig.bind followed by apply_defaults. This generates a
    native Python function (using exec) which can be memoized on a per-kernel
    basis to avoid having to run these expensive functions -- which constitute
    much of the kernel launch overhead -- every time we run the kernel.
    z("constexpr", )TrueFalsezspecialize_impl(z, specialize_extra, , z("z",) + z[1:]c                x    | d   j                   t        j                  j                  u r| d   S | d    d| d    S )Nr   r   z	=default_)r   r=   r   r   )r   s    r3   r   z0create_function_from_signature.<locals>.<lambda>  sA    AaDLLG,=,=,C,CCAaD AaD6QZ[\]^[_Z`Ia r4   z
def dynamic_func(z	**optionsz):
    params = {'z': z}
    specialization = [,z-]
    return params, specialization, options
default_rH   r   r   dynamic_func)r   
parametersziprI   r   r   r   r   r   r   joinr   mapr   r   r=   r   r   rH   r   get_arg_specializationexec)sigkparamsbackendspecializationr'   kpr   r   r   r9   rn   	func_bodyr   func_namespaces                 r3   create_function_from_signaturer  k  s4    s~~#g,...N++-w7 0b??!!N4&":;!#v'H$&$8$8fJ!@@GfE$TF*>xj:,VXY^X__`aC!!%%2+=+=*>fSE&NO%%/0 bC))DS#..*>*>*@!ABk]RST U		3>>;N;N;PQ4QtfCv.QRS Txx/0 1I >>//1D%== 1 1 7 77 4&5==(N  %0N=!(>(@N$%)0)G)GN%& 	N# .))' R
s    H!(A H&r   r   
float8e4nvfp8e4nvfloat8e5fp8e5float8e4b15fp8e4b15float8_e4m3fn
float8e4b8fp8e4b8float8_e4m3fnuzfloat8_e5m2float8e5b16fp8e5b16float8_e5m2fnuzfloat16fp16bfloat16bf16float32r   float64fp64int8i8int16i16int32r   r   u8u16u32r   )int64uint8uint16uint32uint64c                       e Zd ZdZdZd Zd Zd Zd Zd Z		 	 ddZ
ed        Zd	 Zd
 Zd Zd Z fdZ fdZd Z xZS )rH   Nc	                   |rt         j                  nt         j                  }	|	y| j                  j                  }
| j                  j
                  }dj                  t        | j                  |d         D cg c]  \  }}|j                   d|  c}}      }|
 d|j                   d|j                   d|j                   d|j                   d	|j                   d
| d} G d d      }t        |
|||d   ||      }||||j                  |j                  |j                  |j                  |j                  |j                   |||d} |	|| |||
|       d|i||d      S c c}}w )NFr  r   z: z[num_warps=z, num_ctas=z, num_stages=z, enable_fp_fusion=z, launch_cooperative_grid=](r  c                      e Zd Zd Zy)/JITFunction._call_hook.<locals>.JitFunctionInfoc                .    || _         || _        || _        y r6   )rC   r'   jit_function)r0   rC   r'   rD  s       r3   r&   z8JITFunction._call_hook.<locals>.JitFunctionInfo.__init__  s    $ 	$0!r4   N)rK   r;   r   r&   r   r4   r3   JitFunctionInforB    s    r4   rE  r   )r   devicer   	num_warpsnum_ctas
num_stagesenable_fp_fusionlaunch_cooperative_gridextern_libsconfigsspecialization_data	is_warmupr   )r   r   fncompileis_manual_warmupalready_compiled)rH   
cache_hookcompiled_hookrP  rK   r;   r  r  paramsr'   rG  rH  rI  rJ  rK  r  rL  )r0   r   r   rF  r   r   rM  rO  beforehookr'   rC   r   r   	arg_reprsr   rE  rN  r   s                      r3   
_call_hookzJITFunction._call_hook  s    *0{%%[5N5N<ww##IIc$++WZ[\W]F^_%**Rt4_`	{7#4#4"5[AQAQ@RR_`g`r`r_s  tG  HO  H`  H`  Ga  a{  |C  |[  |[  {\  \^  _h  ^i  ij  k	 	 <D)YX_`aXbdkmpq #" **((!,, ' 8 8'.'F'F"..#6"
 vtT2C*6*&"
 	
9 `s   9E'
c                T    t        |      sJ | j                  j                  |       y)z
        Add a hook that will be executed prior to the execution of run
        function with args and kwargs passed into the kernel
        N)callablepre_run_hooksr   )r0   rX  s     r3   add_pre_run_hookzJITFunction.add_pre_run_hook  s$    
 ~~!!$'r4   c                    ddl m}m}m}m} t
        j                  j                         } ||      }|| _        || _        || _        t        | j                  | j                  |      }i |||fS )z1
        Precompute as much as possible.
        r   )CompiledKernelrQ  	ASTSourcemake_backend)compilerr`  rQ  ra  rb  r   activeget_current_targetr  r   rV  )r0   r`  rQ  ra  rb  r   r  binders           r3   create_binderzJITFunction.create_binder   sb     	PO113v&,"/WU67F**r4   c                  |j                  d| j                        xs# t        j                  j                  dd      dk(  |d<   t        j
                  j                         }t        j
                  j                  |      }| j                  D ]
  } ||i |  | j                  |   \  }}	}
} ||i |\  }}}t        |      t        |      z   }|j                  |d       }||
j                  |      }| j                  D cg c]  }|j                   }}|D cg c]  }|d   	 }}t        ||      D ci c]  \  }}||
 }}}d|vsJ d       d|vsJ d	       d
|vsJ d       |D ]#  }||j                  vs||vst!        d|z         t#        |d       }|D ci c]&  }|t%        t'        |j)                               |      ( }}|D cg c]  }|d   	 }}t#        |d       }|D ci c]  }||
j+                  t%        ||              }}| j-                  ||||||g|d      ry | j/                  | |||      }| j1                  ||	|j                        }|||<   | j-                  ||||||g|d       t3               }| j4                  j7                         D ]6  \  \  }}\  }} | j                  ||      x}!|k7  s$t9        d| d| d|!        |s|J t;        |      r ||      }t=        |      }"|d   }#|"dkD  r|d   nd}$|"dkD  r|d   nd}% |j>                  ||g|j)                          }& |j@                  |#|$|%||jB                  |jD                  |&| jF                  jH                  | jF                  jJ                  g	|j)                           |S c c}w c c}w c c}}w c c}w c c}w c c}w )NdebugTRITON_DEBUG01r   device_typez=device_type option is deprecated; current target will be usedrF  z8device option is deprecated; current device will be usedstreamz8stream option is deprecated; current stream will be usedz2Keyword argument %s was specified but unrecognisedc                    |dk(  S r   r   )rQ   r_   s     r3   r   z!JITFunction.run.<locals>.<lambda>/  s    sk?Q r4   r   c                "    t        |t              S r6   )r$   rN   )rQ   r   s     r3   r   z!JITFunction.run.<locals>.<lambda>3  s    As9K r4   T)rW  )r   r   FrF   z1 has changed since we compiled this kernel, from z to r   )&r^   ri  osenvironr   rd  get_current_deviceget_current_streamr]  device_cachesrN   parse_optionsrV  r'   r  r   KeyErrorr   r   r   r   
parse_attrrZ  ra  rQ  objectr.   r   rJ   r\  r   launch_metadatar   functionpacked_metadatar`  launch_enter_hooklaunch_exit_hook)'r0   r   r   rm   r   rF  rn  rX  kernel_cacher   r  rf  
bound_argsr  r   r   kernelr   sigkeyssigvalsrO   vr   
constexprspathattrvalsr   r1   not_presentr'   rQ   r_   globals_dictnewVal	grid_sizegrid_0grid_1grid_2rz  s'                                          r3   r   zJITFunction.run  s'    **Wdjj9gRZZ^^N\_=`dg=gw 11311&9 && 	"D$!&!	" 150B0B60J-fgv.4d.Ef.E+
NG .!CL0!!#t, >++F3G'+{{3!qvv3G3%34qt4G4,/,AB&1aABIB .o0oo.6)e+ee)6)e+ee) ]G,,,'1A"#WZ[#[\\] 'w0QRJ_ijW[$ 1$z7H7H7J2KT RRjJj&45!5H5!(,KLETYZqQ**+<Xq+IJJZEZsIvz7UGU[dhi..y*eDC\\#fg>N>N\OF &LOOCFJ%RXafOg h.2.C.C.I.I.K 	q*IT1*\&**4==#E"&tf,]^a]bbfgmfnoq q	q
 ###~J'D	I!WF )AT!W1F )AT!W1F4f44T6XJDUDUDWXOFJJvvvvvH^H^&(;(;(M(MtObObOsOs-"))+- ] 44B k5Zs$   N.N3<N8&+N>O6#Oc                T    | j                   | j                  S | j                  |      S r6   )_repr_fn_name)r0   rQ   s     r3   r   zJITFunction.reprT  s"     $

 2t}}E

1Er4   c	           	        |r|ng }|r|ng }|| _         |j                  | _        || _        t	        j
                  |      | _        || _        || _        t	        j                  |      d   | _	        || _
        |j                  | _        || _        g | _        t        | j
                  j                   j#                               D ]T  \  }	}
|	|v xs |
j$                  |v }|	|v xs |
j$                  |v }| j                  j'                  t)        |	|
||             V t+        j,                  t	        j.                  |            }|t1        j2                  d|t0        j4                        j7                         d  }| j9                  |       t;        | j<                        | _        d | _         i | _!        d | _"        || _#        || _$        | j                  D cg c]  }|j$                   c}| _%        | j                  D cg c]  }|jL                  s|jN                   c}| _(        g | _)        |jT                  | _*        |j                  | _        |jV                  | _+        |j                  | _        y c c}w c c}w )Nr   z^def\s+\w+\s*\(),rP  r;   rC   versionr=   r   r   r   getsourcelinesstarting_line_numberr  rK   r  rz  rV  	enumerater  r   r'   r   r   textwrapdedent	getsourceresearch	MULTILINEstart_unsafe_update_srcr   rg  ru  hashr.   r  ri  rG   	arg_namesr   r   r  r]  r   __globals__)r0   rP  r  r   r   ri  rG   r   rz  r   r   dnsdns_oar1   ps                  r3   r&   zJITFunction.__init__W  s   1B-Ki)Goq&mm **2.!2.L+$+$:$:2$>q$A!
.!$..";";"B"B"DE 	CHAu((KEJJ:K,KC88hEJJJh<hFKK{1eS&AB	C oog//34")).R\\BHHJKL$(););<	 TV 
  +/++6Q!&&6*.++HQ155H   zz>>-- 7Hs   I2>I7I7c                x   | j                   t        | j                  | j                  | j                        }|j                  | j                                |j                  t        | j                        z   | _         t        t        |j                  j                                     | _        | j                   S )N)r'   r,   r1   )r  r   rK   r  r1   rc   parser9   rN   r  dictsortedr.   r   )r0   dependencies_finders     r3   rM   zJITFunction.cache_key  s     99"4$--QUQaQagkgogo"p%%djjl3+//#d6O6O2PPDI$(0C0T0T0Z0Z0\)]$^D!yyr4   c               \     | j                   t        t        j                  |      |dd|S )NTr   )r   r  
MockTensor
wrap_dtype)r0   r   rm   r   s       r3   r   zJITFunction.warmup  s*    txxZ5J5JD1QT$\U[\\r4   c           	     `   ddl m}m} dd l}dd lm} t        j                  j                         }|j                  |      }|d   | j                  j                  k7  r(t        d|d    d| j                  j                         t        t        |d         }|d   }	t        ||	      D 
ci c]4  \  }
}|
|j                   j#                  |      r|j!                  |      n|6 }}
}t        t        |d	         }|d
   }t%        t        ||            }t%        |d   j'                               } || |||      }|d   j'                         D 
ci c]#  \  }
}|
t)        |t*              rt        |      n|% }}
}|d   }
 ||d |      }|| j,                  |   d   |
<   |S c c}}
w c c}}
w )Nr   )rQ  ra  r   r'   zSpecialization data is for z but trying to preload for r   r   r   r   r   r   r   )rc  rQ  ra  r   triton.languager   r   rd  rs  loadsrP  rK   rJ   r  r   r  r   is_dtyper  r   r$   r   ru  )r0   rN  rQ  ra  r   tlrF  deserialized_objr   r   r   rg   r   r   r   r   r   r1   r   r  s                       r3   preloadzJITFunction.preload  s   1$113::&9:F#tww'7'77-.>v.F-GGbcgcjcjcscsbtuw wE#3O#DE(9 "-?
U BHH$5$5e$<%%G
	 
  0 >?
%l3
SZ01)+6<<>?	iE: /y9??A
U E4!8ueC
 
 u%dG,-36"1%c*!

s   -9F$(F*c                   t        j                  | j                        }t        |t         j                        sJ t        |j                        dk(  sJ t        |j                  d   t         j                        sJ |S )Nr   r   )rZ   r  r1   r$   Moduler   bodyFunctionDef)r0   trees     r3   r  zJITFunction.parse  s_    yy"$

+++499~"""$))A,888r4   c                    t        d      )Nz:Cannot call @triton.jit'd outside of the scope of a kernel)rJ   )r0   rm   r   s      r3   __call__zJITFunction.__call__  s    WXXr4   c                V    |dk(  rt        d| d      t        t        |   ||       y )Nr1   zCannot set attribute 'zX' directly. Use '_unsafe_update_src()' and manually clear `.hash` of all callersinstead.)AttributeErrorr%   rH   __setattr__)r0   r'   rg   r2   s      r3   r  zJITFunction.__setattr__  s<    5= #9$ @, "- . . 	k4,T59r4   c                4    d| _         t        | 	  d|       y)z
        The only method allowed to modify src.
        Bypasses the __setattr__ restriction by calling super().__setattr__ directly.
        Nr1   )r  r%   r  )r0   new_srcr2   s     r3   r  zJITFunction._unsafe_update_src  s    
 	E7+r4   c                P    d| j                    d| j                  j                   dS )NzJITFunction(:r  )rC   rP  rK   r8   s    r3   __repr__zJITFunction.__repr__  s&    dkk]!DGG,<,<+=Q??r4   )NNNNNNN)rK   r;   r   rT  rU  rZ  r^  rg  r   r   r&   r   rM   r   r  r  r  r  r  r  r   r   s   @r3   rH   rH     sw    J M4
l(+ENF mq;?;(z  ]@Y:,@r4   rH   c                     y r6   r   )rP  s    r3   jitr    s    r4   r  r   rz  r   r   ri  rG   c                     y r6   r   r  s          r3   r  r    s     r4   c               @    dfd}|  ||       S |S )a<  
    Decorator for JIT-compiling a function using the Triton compiler.

    :note: When a jit'd function is called, arguments are
        implicitly converted to pointers if they have a :code:`.data_ptr()` method
        and a `.dtype` attribute.

    :note: This function will be compiled and run on the GPU. It will only have access to:

           * python primitives,
           * builtins within the triton package,
           * arguments to this function,
           * other jit'd functions

    :param fn: the function to be jit-compiled
    :type fn: Callable
    c           
         t        |       sJ t        j                  dd      dk(  rddlm}  ||       S t        |       S )NTRITON_INTERPRETrk  rl  r   )InterpretedFunction)r  r   r   ri  rG   r   rz  )r\  rq  getenvinterpreterr  rH   )	rP  r  ri  r   r   rz  rG   r   r  s	     r3   	decoratorzjit.<locals>.decorator  sq    ||99'-48&r7N_Fdlq08tUdf f "3/M! /	 	r4   rP  r   r   zJITFunction[T]r   )	rP  r  r   rz  r   r   ri  rG   r  s	    ``````` r3   r  r    s&    : & 
~} r4   c                  F    e Zd ZdZed        Zd Zed        Zed        Zy)r  zr
    Can be used in place of real tensors when calling:
        kernel.warmup(MockTensor(torch.float32), ...)
    c                l    | j                   j                  dk(  r| j                  dk(  rt        |       S | S )Nr   torch)r2   rK   r;   r  )rn   s    r3   r  zMockTensor.wrap_dtype9  s.    ==!!W,71Jc?"
r4   c                    || _         y r6   )r   )r0   r   s     r3   r&   zMockTensor.__init__?  s	    
r4   c                      yNr   r   r   r4   r3   data_ptrzMockTensor.data_ptrB      r4   c                      yr  r   r   r4   r3   	ptr_rangezMockTensor.ptr_rangeF  r  r4   N)	rK   r;   r   r   staticmethodr  r&   r  r  r   r4   r3   r  r  3  sH    
  
    r4   r  c                  J    e Zd Zd Zd Zd ZddZd Zd Zd Z	d Z
d	 Zd
 Zy)TensorWrapperc                    || _         || _        |j                  | _        |j                  | _        | j                  j                  | _        y r6   )r   basedatarF  shape)r0   r  r   s      r3   r&   zTensorWrapper.__init__M  s5    
	II	kkYY__
r4   c                6    | j                   j                         S r6   )r  r  r8   s    r3   r  zTensorWrapper.data_ptrT  s    yy!!##r4   c                4     | j                   j                  | S r6   )r  stride)r0   rm   s     r3   r  zTensorWrapper.strideW  s    tyy&&r4   c                <    d| j                    d| j                   dS )NzTensorWrapper[r@  r  )r   r  r8   s    r3   __str__zTensorWrapper.__str__Z  s    

|2dii[::r4   c                6    | j                   j                         S r6   )r  element_sizer8   s    r3   r  zTensorWrapper.element_size]  s    yy%%''r4   c                ^    t        | j                  j                         | j                        S r6   )r  r  cpur   r8   s    r3   r  zTensorWrapper.cpu`  s    TYY]]_djj99r4   c                N    | j                   j                  |j                          y r6   )r  copy_)r0   others     r3   r  zTensorWrapper.copy_c  s    		

#r4   c                ^    t        | j                  j                         | j                        S r6   )r  r  cloner   r8   s    r3   r  zTensorWrapper.clonef  s    TYY__.

;;r4   c                `    t        | j                  j                  |      | j                        S r6   )r  r  tor   )r0   rF  s     r3   r  zTensorWrapper.toi  s     TYY\\&14::>>r4   c                `    t        | j                  j                  |      | j                        S r6   )r  r  	new_emptyr   )r0   sizess     r3   r  zTensorWrapper.new_emptyl  s"    TYY007DDr4   Nr   rN   )rK   r;   r   r&   r  r  r  r  r  r  r  r  r  r   r4   r3   r  r  K  s5    %$';(:$<?Er4   r  c                    t        | t              r;|| j                  j                  k(  r| j                  S t        | j                  |      S t	        | d      rt        | |      S t        dt        |        d      )Nr  zCannot reinterpret a r   )r$   r  r  r   r   r   rX   )r   r   s     r3   reinterpretr  p  sk    &-(FKK%%%;; !e44		$VU++/V~Q?@@r4   c                d   | }t        |t              s|j                  }t        |t              s|j                  j                  j                  }t        j                  |j                        \  }}t        |      D ].  \  }}|j                         j                  d      s&||z  } ||fS  ||fS )Nzdef )
r$   rH   rP  __code__co_filenamer=   r  r  stripr@   )rP  base_fn	file_namelines
begin_lineidxlines          r3   get_jit_fn_file_liner     s    G+.** +.

##//I..wzz:E: u% 	T::<""6*#Jj  	 j  r4   r  )Fr  )r   Optional[Callable]rz  r  r   Optional[Iterable[int]]r   r  ri  Optional[bool]rG   r  r   zCallable[[T], JITFunction[T]]r6   )rP  zOptional[T]r   r  rz  r  r   r  r   r  ri  r  rG   r  r   z4Union[JITFunction[T], Callable[[T], JITFunction[T]]])9
__future__r   r   rZ   r(   r=   rv   rq  r  r  collectionsr   	functoolsr   typingr   r   r	   r
   r   r   r   r   r   r   runtime.driverr   typesr   _utilsr   r   rK   r   rA   r   NodeVisitorr   r   r   r   r   r   r   r   r  r  r   r   r   r  rH   r  r  r  r  r   r   r4   r3   <module>r     s   , 
    	 	  # % d d d #  5.3~../CL~! ~!L-> ->` 	 /dZ
	Zgaj 	Z	0*f
D)  :	
 Y ) y 7 : z v  v v D  U!" U#$ - 2 
(//1	2 &A$%q!&a@/!$ a@R	 
 
 
 #*.15>B #
 
 (	

 /
 %<
 
 
 #
 

 4 #*.15>B #44 	4
 (4 /4 %<4 4 4 :4x 0"E "EJA!r4   