
    nVh                        d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlZd dlZd dlmZ dZedk(  r ee      Zej%                  d	d
       ej%                  ddeddd       ej%                  ddedd       ej%                  ddedd       ej%                  ddedd       ej%                  ddedd        ej%                  d!d"ed#d$       ej%                  d%d&ed'd$       ej+                         Zej.                  rej.                  nej0                  Zej2                  rej2                  n ee      Z eej4                        Zej4                  j9                  d  eej:                               ej<                  j?                  ej@                  e      Z!ej<                  jE                  e!      Z#e!jH                  jK                  e#        e&e#ej0                        Z'ejP                  jS                  d(      Z( e*e(      dk(  sJ  e+ e,d) ejZ                  jS                  d(                  Z-d*e
e   fd+Z.d,ej^                   d-ej`                   Z1 e.e-e1gz         Z2d. Z3 e4e-      D  ci c]&  \  } }d/|v s| f e3|jS                  d/      d         ( c}} Z5e5jm                         D ci c]  \  }}|	|| c}}Z5 e4e-      D  ci c]  \  } }e'jn                  |     e3|       c}} Z8e8jm                         D ci c]  \  }}|	|| c}}Z8e5jm                         D ]   \  Z9Z:e:dk(  se:e8e'jn                  e9d       <   "  e4e-      D  ci c]'  \  } }e'jn                  |    |jS                  d/      d    ) c}} Z-e8D ]  Z9d0e-e9<   	 d1jw                  e8jy                         D cg c]
  } e|       c}      Z=e8jm                         D cg c]  \  }}| d2|  c}}Z>e>d3ej^                   d4ej`                   gz  Z>e5jy                         D ]  Z?e?d5v rJ d6e?         e5jm                         D ci c]  \  }}|d7k(  s|d8d7gg c}}Z@ej                  j                  e'e8e-e@9      ZCej^                  ej`                  d:ZD ej                  eCeD;      ZFeFj                  j                  d kD  r eId<      g Z7g ZJg ZKg ZL e4e'jn                        D ]  \  ZMZNeNe8vrKe7j                  eN       eJj                  e-eN          eKj                  eN       eLj                  e-eN          Ue5j                  eMfd      dk(  sle7j                  eN       eJj                  d=        dZQ e4e-jy                               D ]G  \  ZMZReQ eeM      z  ZQe5j                  eMfd      dk(  reQd>z  ZQe5j                  eMfd      d7k(  sCeQd?z  ZQI d@jw                  ee2eQg      ZSeFj                  dA   ZT e e j                  eT            dBdC ZVi dDeSdEej0                  dF e*eT      dGdHjw                   eWeVdddB   eVdddB         D cg c]  \  }}dI| |  c}}      d*dHjw                   eWeKeL      D cg c]  \  }} e|       dJ|  c}}      dKdHjw                   eWe7eJ      D cg c]  \  }} e|       dJ|  c}}      dLdHjw                  eKD cg c]  }dM| 	 c}dNgz         dO e*eK      dz   dPe>dQeFj                  j                  dRej^                  dSd@jw                  e=e1g      dTe(d    dUe(d   dVe(dB   dWdZYdXD ]  ZZ ee[      j:                  dYz  dZz  d[eZ z  Z\ej                  d\e2 d@eQ d\eZ       j                  d]      5 Z_e_j                    ee\      j                         j                  d^i eY       ddd        yyc c}} w c c}}w c c}} w c c}}w c c}} w c c}w c c}}w c c}}w c c}}w c c}}w c c}}w c c}w # 1 sw Y   xY w)_    N)ArgumentParser)Path)List)	ty_to_cppa  
Triton ahead-of-time compiler:

This program compiles the kernel with name `kernel-name` in the file at the
provided `path` into self-contained C source-code that embeds the `cubin`
data along with utilities to load, unload and launch the kernel.

signature is provided as a list of (optionally divisibility-hinted) types
or constexpr values, e.g.

`compile.py --kernel-name kernel --signature "*fp32:16, i32:16, 1024, i32" --out-name kernel /path/to/kernel.py`

will compile triton.JITFunction of name `kernel` inside the file `/path/to/kernel.py`.
Said kernel will be specialized such that argument 0, 1 are assumed to be multiple of 16,
and argument 2 is assumed to be a compile-time constant of value 1024, i.e. it won't be part of the generated prototype.

The resulting entry point will have signature

CUresult kernel_{specialization_suffix}(CUstream stream, unsigned gX, unsigned gY, unsigned gZ, float* arg0, int32_t arg1, int32_t arg2)

Different such specialized entry points can be combined using the `linker.py` script.

NOTE: when resolving the scope of /path/to/kernel.py, the file will be executed from within its parent directory with the python interpreter
used to run this `compile.py` script
__main__)descriptionpathzTPath to Python source containing desired kernel in its scope. File will be executed.)helpz--kernel-namez-n zName of the kernel to compileT)typedefaultr
   requiredz--num-warpsz-w   z$Number of warps to launch the kernel)r   r   r
   z--num-stagesz-ns   z/Number of stages (meta-parameter of the kernel)z
--out-namez-onz Out name for the compiled kernelz
--out-pathz-ozOut filenamez--signaturez-szSignature of the kernel)r   r
   r   z--gridz-gzLaunch grid of the kernel,c                 $    | j                  d      S )N )strip)ss    D/home/dcms/DCMS/lib/python3.12/site-packages/triton/tools/compile.py<lambda>r   F   s    1773<     	signaturec                     t        j                         }|j                  dj                  |       j	                                |j                         d d S )Nr      )hashlibsha256updatejoinencode	hexdigest)r   ms     r   hash_signaturer#   H   s?    NN	)$++-.{{}Ra  r   warpsxstagesc                 v    	 t        |       }|S # t        $ r Y nw xY w	 t        |       }|S # t        $ r Y y w xY w)N)int
ValueErrorfloat)r   rets     r   	constexprr+   P   sO    	a&CJ 			(CJ 		s    	, 	88:r+   x=z
num_warps=znum_stages=)r      z#Only 1 and 16 are valid hints, got r/   ztt.divisibility)fn
constexprsr   attrs)	num_warps
num_stages)optionszMAOT compiling kernels with global scratch requirements is not yet implementedi32cd_cubin   kernel_nametriton_kernel_namebin_sizebin_dataz, 0xr   full_signaturearg_pointers&z&global_scratchnum_argskernel_docstringsharedr3   	algo_infogridXgridYgridZ_placeholder)hr7   extracudazcompile..w )cbinasciir   importlib.util	importlibsysargparser   pathlibr   typingr   tritontriton.backendstriton.backends.nvidia.driverr   desc__name__parseradd_argumentstrr'   
parse_argsargsout_namer=   out_pathr	   arg_pathinsertparentutilspec_from_file_locationstemspecmodule_from_specmodloaderexec_modulegetattrkernelgridsplitlenlistmapr   r#   r3   r4   meta_sigsig_hashr+   	enumeratehintsitems	arg_names	constantskeyvaluer   values	const_sig
doc_stringrM   r2   compiler	ASTSourcesrcoptscompileccinfometadataglobal_scratch_sizeRuntimeError	arg_typesarg_names_not_1arg_types_not_1iarg_nameappendgetsuffixty	func_nameasmhexlifyhex_ziprG   paramsext__file__template_pathwith_suffixopenfpwrite	read_textformat)	r   r   kvr-   ynamer   args	   000000000r   <module>r      s      
 #     34 z -F
s  u
CJi!%  '
t#qGmn
CN  P
e#tJlm
dt.Y
t#<U`de
$S7R]abD $t}}43C3CH $t}}4>H DIIHHHOOAs8??+,>>11(--JD
..
)
)$
/CKKC S$**+F99??3Dt9>> S/1E1Ec1JKLI!$s) !
 t~~&gdoo->?Hi8*45H <EY;O\41aSVZ[S[aUIaggcl1o..\E#kkm=daq}QT=E?H?STtq!!!!$il2TI"+//"3E$!Qq}AEIkkm 8
UA:27If&&s1v./8 CLIBVW$!Q!!!$aggcl1o5WI %$	#%)*:*:*<=Q#a&=>I)2):;AQCq*;JZ/0K?P2QRRJ\\^ GG|FB1#FF|G6;kkmOdaqBwQ#R())OE
//
#
#v)y`e
#
fCtGDV^^C.F**Q.jkkIIOO !1!12 $89$X&Yx01""8,""9X#67YYud#q(X&U#$ F9++-. 2#a&99aUD!Q&cMF99aUD!R'cMF (Hf56I
**W
Cx$%a+Dyd.. 	CH 	DIIs4!9d14a4j7QRtq!A3qc{RS	
 	TYY#o_nJophdB9R=/4& 9pq 	$))sS\^gOh$i84	"av%>$ij 			"HQse9"HL]K^"^_ 	C(1, 	J 	&//(( 	T^^ 	SXXy(34 	a 	a 	a  	!F$  GX--7&@XcUCSS!!AhZq#"?@EEcJ 	GbHH;T-(224;;EfEF	G 	GGq l ]=TE X >;
 PH Sp$i"H	G 	Gsl   ``
``" `
`$`+,``-`$`*'`*(`0`6`<?a?6aa	