
    nVh&H                       d dl mZ d dlZd dlZddlmZmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlmZ d dlmZ d dlZd dlZd dlZd dlZdZdeiZdZ de iZ!d Z" G d d      Z# G d d      Z$ ejJ                         d        Z&d Z'd"dZ(d#dZ)d Z* G d d      Z+ G d de,      Z- G d  d!      Z.y)$    )annotationsN   )get_cache_invalidating_env_varsir)backends)	GPUTarget)__version__)OutOfResources)get_cache_managerget_dump_managerget_override_manager)driver)get_sass   )ast_to_ttir)Pathz=\.(?:visible|extern)\s+\.(?:entry|func)\s+(\w+)\s*\(([^)]*)\)ptxz\.param\s+\.(\w+)c                    t        j                  d|       }t        j                  d|       }|yt        j                  dd|       } |dt        |j	                  d            z   S | S )Nz!tt\.ptr<([^,]+)ztt.nv_tma_desc = 1	nvTmaDescz {[^}]+} *r   )researchsubconvert_type_reprgroup)xmatchtmas      H/home/dcms/DCMS/lib/python3.12/site-packages/triton/compiler/compiler.pyr   r   '   sd     II)1-E
)))1
-C

{B"A&u{{1~666H    c                  (    e Zd ZdddZd Zd Zd Zy)	ASTSourceNc                   || _         d| _        |j                  | _        || _        t               | _        |g|j                         D ]T  \  }}t        |t              r|j                  j                  |      fn|}t        |t              sJ || j                  |<   V |xs
 t               | _        t        | j                  t              rLt        | j                  j                  d            D ci c]  \  }}||j!                          c}}| _        y | j                  j#                         D ]  }t        |t              rt%        d       y c c}}w )Nttir,zSignature keys must be string)fnext__name__name	signaturedict	constantsitems
isinstancestr	arg_namesindextupleattrs	enumeratesplitstripkeys	TypeError)selfr'   r+   
constexprsr4   kvs          r    __init__zASTSource.__init__6   s   KK	"!"((* &11;As1CR\\''*-!!U+++$%q!& _df
dnnc*7@AUAUVYAZ7[\tq!al\DN^^((* E!!S)#$CDDE ]s   3Ec           	        t        | j                  j                               D cg c]  \  }}|	 }}}d }dj                  t        | j                  j                               D cg c]  \  }} ||       c}}      }| j
                  j                   dt        | j                         d| d| }t        j                  |j                  d            j                         S c c}}w c c}}w )Nc                H    t        | d      r| j                  S t        |       S )N	cache_key)hasattrrA   r0   )r   s    r    <lambda>z ASTSource.hash.<locals>.<lambda>K   s    71k+BAKK A r!   -utf-8)sortedr+   r.   joinr-   r'   rA   r0   r4   hashlibsha256encode	hexdigest)r:   r<   r=   
sorted_sigget_keyconstants_keykeys          r    hashzASTSource.hashI   s    $*4>>+?+?+A$BCDAqaC
CN@T@T@V9W!XA'!*!XY""#1S_$5Qzl!M?S~~cjj12<<>>	 D!Xs   C**C0
c                8    t        | j                  | ||||      S )N)contextoptionscodegen_fns
module_map)r   r'   r:   rS   rT   rU   rR   s        r    make_irzASTSource.make_irP   s!    477D'7Xc&02 	2r!   c                    t               S N)r,   r:   s    r    parse_optionszASTSource.parse_optionsT   s	    vr!   NNreturnNoner)   
__module____qualname__r>   rP   rW   r[    r!   r    r#   r#   4   s    E&?2r!   r#   c                  $    e Zd Zd Zd Zd Zd Zy)IRSourcec                   || _         t        |      }|j                  dd  | _        |j	                         | _        t        j                  |       |j                  |       | j                  dk(  rt        j                  t        | j                     | j
                  t        j                        }|j                  d      | _        |j                  d      }t        j                  t        | j                     |      }t!        |      D ci c]  \  }}|t#        |       c}}| _        y t        j&                  | j                   |      | _        | j(                  j+                         }	d|	z   | _        | j(                  j-                  |	      }
| j(                  j/                  |
      }t!        |      D ci c]  \  }}||
 c}}| _        y c c}}w c c}}w )Nr   r   r   @)pathr   suffixr(   	read_textsrcr   load_dialectsr   r   prototype_pattern	MULTILINEr   r*   findallarg_type_patternr5   r   r+   parse_mlir_modulemoduleget_entry_func_nameget_functionget_function_signature)r:   rh   rR   backendr   r+   typesr<   tyfn_namefuncOpfunc_tys               r    r>   zIRSource.__init__Z   sc   	Dz;;qr?>>#
!g& 88uII/9488R\\READIAIJJ/99EEDMeDTU51ba!22!66UDN..tyy'BDKkk557GgDI[[--g6Fkk88@G1:71CD2aeDDN V Es   G:Gc                z    t        j                  | j                  j                  d            j	                         S )NrE   )rH   rI   rk   rJ   rK   rZ   s    r    rP   zIRSource.hashr   s'    ~~dhhoog67AACCr!   c                <    || j                   _        | j                   S rY   )rr   rR   rV   s        r    rW   zIRSource.make_iru   s    %{{r!   c                    | j                   dk(  r(| j                  j                  d      }|J d       d|iS t               S )Nttgirzttg.num-warpsz'Unable to parse ttg.num-warps attribute	num_warps)r(   rr   get_int_attrr,   )r:   r   s     r    r[   zIRSource.parse_optionsy   sF    88w00AI(S*SS(++vr!   Nr`   rc   r!   r    re   re   X   s    E0Dr!   re   c                    dd l } t        j                  j                  t        j                  j                  t        j                  j	                  t
                          }g }t        t
        d      5 }|t        j                  |j                               j                         gz  }d d d        t        j                  j                  |d      dft        j                  j                  |d      dfg}|D ]  \  }}| j                  |g|      D ]y  }t        |j                  j                  |j                        j                   d      5 }|t        j                  |j                               j                         gz  }d d d        {  t        j                         }t#        j$                  d      j'                  d	      d
   }	t        t        j                  j                  |dd|	       d      5 }	 |j                  d      }
|
sn|j)                  |
       &	 d d d        |j+                  |j                                t        j                  j                  |d      }| j                  |gd      D ]y  }t        |j                  j                  |j                        j                   d      5 }|t        j                  |j                               j                         gz  }d d d        { t,         dj                  |      z   S # 1 sw Y   jxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w)Nr   rbcompilerztriton.compiler.r   ztriton.backends.)prefix
EXT_SUFFIX._Cz
libtriton.i   languageztriton.language.rD   )pkgutilosrh   dirnameabspath__file__openrH   rI   readrK   rG   walk_packagesmodule_finder	find_specr*   origin	sysconfigget_config_varr6   updateappendr	   )r   TRITON_PATHcontentsfpath_prefixesrh   r   liblibtriton_hashr(   chunklanguage_paths               r    
triton_keyr      s   ''//"''//"''//(2K"LMKH	h	 ;W^^AFFH-779::; 
k:	.0BC	k:	.0BCM & Cf(($(? 	CCc''11#((;BBDI CQW^^AFFH5??ABBC C	CC ^^%N

"
"<
0
6
6s
;B
?C	bggll;
3%.@A4	H )AFF7OE!!%(	  	) OON,,./GGLLj9M$$m_=O$P ?###--chh7>>E 	?1;;=>>H	? 	?? ]chhx0007; ;C C) )	? 	?s0   46L6L9(L,96L9LL),L69M	c                    |dk(  s|dk(  rt        j                  | |      }||_        |S |dk(  s
|dk(  s|dk(  rt        |       j	                         S |dk(  s|dk(  rt        |       j                         S y )Nr%   r   llirr   amdgcncubinhsaco)r   rq   rR   r   rj   
read_bytes)	full_namer(   rR   rr   s       r    parser      sx    
f}w%%i9 
f}uxI((**
g~I))++ (r!   c                *   t        j                  dd      dk(  ry| j                  t        | j                         | j                  t        | j                         ddg}|D cg c]"  }|j                  dt         j                        $ }}| j                  g }4t        fd|D              s|j                         j                  4t        ||d	d       D ]  \  }}||_
         |sd| _        yd|d
   _
        |d   | _        yc c}w )z
    Removes code_generator.py and related files from tracebacks.

    These are uninteresting to the user -- "just show me *my* code!"
    TRITON_FRONT_END_DEBUGGING01Nz"/triton/compiler/code_generator.pyz/ast.py/c              3     K   | ]6  }j                   j                  j                  j                  |      s3| 8 y wrY   )tb_framef_codeco_filenameendswith).0r   tbs     r    	<genexpr>z#filter_traceback.<locals>.<genexpr>   s/     V2;;+=+=+I+I+R+RST+U1Vs   4??r   r   r   )r   getenv	__cause__filter_traceback__context__replacesep__traceback__anyr   tb_nextzip)e	BAD_FILESbad_fileframes	cur_frame
next_framer   s         @r    r   r      s    
yy-s3s:{{%}} ' 	-I @II8!!#rvv.III	
BF
.ViVVMM"ZZ .
 $'vvabz#: 'J&	' !r
 )! Js   &'Dc                	   |t         j                  j                         }t        |t              sJ d       t        |      }t        | t               }|r8t        | t              sJ d       t        j                         }t        | ||      } | j                         }|j                  t        |xs
 t               fi |      }t               }t                d| j                          d|j                          d|j                          dt        t!        |j#                                      	}t%        j&                  |j)                  d            j+                         }	t-        |	      }
t.        j0                  j3                  dd      dk(  }t.        j0                  j3                  dd      dk(  }t.        j0                  j3                  d	d      dk(  }|rt5        | j                               nd }|rt7        | j                               nd }| j8                  d d
 }| d}|
j;                  |      xs i }|j3                  |      }t.        j0                  j3                  dd      dk(  }|s|t=        | ||	      S |	|d|j>                  |}t@        |d<   t               }|jC                  ||       tE        |jG                               jI                  | jJ                        }|r|dz  }t        | t              s:t        j                         }t        jL                  |       |jM                  |       |jO                  |      }|jQ                         }	 | jS                  |||      }t.        j0                  j3                  dd       }tE        |j#                               |d  D ]  \  }} |||      }| d| } |.|jY                  |       x}!t[        d|!        t]        |!||      }|r|dv r|
j_                  ||       || <   ||j_                  ||        ||k(  r0|
jY                  |       }"|ja                  |"       t[        d|"        |} |
j_                  tc        jd                  |tf              |d      ||<   |
ji                  ||       t.        j0                  j3                  dd      dk(  s|jk                          t=        | ||	      S # tT        $ r}tW        |        d }~ww xY w)Nz target must be of GPUTarget typez'source must be either AST or a filepathrD   rE   TRITON_KERNEL_OVERRIDEr   r   TRITON_KERNEL_DUMPTRITON_STORE_BINARY_ONLY   .jsonTRITON_ALWAYS_COMPILE)rP   targettriton_versionr   
USE_IR_LOCr   z
Overriding kernel with file )r   r   jsonzCreating new locations for )defaultF)binaryTRITON_ENABLE_ASAN)6r   activeget_current_targetr/   r   make_backendr#   r0   r   rR   re   r[   r,   r   r   rP   rF   r.   rH   rI   rJ   rK   r   r   environgetr   r   r*   	get_groupCompiledKernel__dict__r	   
add_stageslistr8   r2   r(   rl   get_codegen_implementationget_module_maprW   	Exceptionr   get_fileprintr   putcreate_location_snapshotr   dumpsvars	put_groupdisable_multithreading)#rk   r   rS   rv   	ir_sourcerR   extra_optionsenv_varsrO   rP   fn_cache_managerenable_overrideenable_ir_dumpstore_only_binaryfn_override_managerfn_dump_manager	file_namemetadata_filenamemetadata_groupmetadata_pathalways_compilemetadatastagesfirst_stagerT   rU   rr   r   
use_ir_locr(   
compile_irnext_moduleir_filenamer   ir_full_names#                                      r    compiler     s   ~113fi(L*LL(6"GsI..I#s#N%NN#**,sGW-%%'M##D):DF$Lm$LMG.0H\N!CHHJ<q(8',,.9I3vV^VdVdVfOgKhJi
jC>>#**W-.88:D(. jjnn%=sCsJOZZ^^$8#>#EN

'A3G3N>M.sxxz:SW6D&sxxz2$O
 #I$+U+%//0ABHbN"&&'89MZZ^^$;SASHNm7c>488  

 	H "-HVFvw'v{{}%++CGG4Kq c8$**,
!g&44W=K'')JWk:wG d3J/= Z 2"1SE*+>Q>Z>Z[f>g1g0t29+>?	38K!s.H'H*:*>*>{K*XN;'&[9+44[AL00>/~>?!$ )9(<(<TZZZ^=_arDI )= )KN$%0.A ::>>.4;&&(#~t44K  s   S 	S6%S11S6c                   t        j                         D cg c]*  }|j                  j                  |       s|j                  , }}t	        |      dk7  r't        t	        |       d| j                   d| d       |d   |       S c c}w )Nr   z! compatible backends for target (z) (z). There should only be one.r   )r   valuesr   supports_targetlenRuntimeErrorrv   )r   r   activess      r    r   r   ?  s    #+??#4[a

8R8RSY8Zqzz[G[
7|q7|n=fnn=MSQXPYYuvx 	x71:f	 \s
    BBc                       e Zd Zd ZddZd Zy)LazyDictc                     || _         g | _        y rY   )dataextras)r:   r  s     r    r>   zLazyDict.__init__I  s    	r!   c                    | j                   D ]  \  }}| j                   || z  | _         | j                   j                          | j                  S rY   )r  r  clearr:   funcargss      r    r   zLazyDict.getM  sG    ++ 	0JD$		D$K/DI	0yyr!   c                >    | j                   j                  ||f       y rY   )r  r   r  s      r    addzLazyDict.addS  s    D$<(r!   Nr]   )r)   ra   rb   r>   r   r  rc   r!   r    r	  r	  G  s    )r!   r	  c                      e Zd Zd Zy)AsmDictc                T    |dk(  rt        | d         }nt        d|z        || |<   |S )Nsassr   zUnknown key: '%s')r   KeyError)r:   rO   values      r    __missing__zAsmDict.__missing__Y  s6    &=T']+E.455S	r!   N)r)   ra   rb   r  rc   r!   r    r  r  W  s    r!   r  c                  >     e Zd ZdZdZd Zd Z fdZd Zd Z	 xZ
S )r   Nc           	        ddl m} t        d |j                         D              }t	        j
                  |j                               }t        |d         |d<   |d   }t        |d   |d   |d         |d<    |d	t        t        |j                                           } |di || _        t        | j                  j                        }	|	j                  | j                        | _        || _        || _        | j                  j&                  | _        |j                         D 
cg c]"  \  }
}|
j)                  d
      rt+        |      $ }}
}|	j,                  }t/        |D ci c]B  }|j0                  dd  |j0                  dd  |k(  r|j3                         n|j                         D c}      | _        | j4                  |   | _        d | _        d | _        y c c}}
w c c}w )Nr   )
namedtuplec              3  \   K   | ]$  \  }}|j                  d       st        |       & yw)r   N)r   r   )r   cps      r    r   z*CompiledKernel.__init__.<locals>.<genexpr>m  s$     `$!QAJJW^L_d1g`s   ,,cluster_dimsr   rv   arch	warp_sizeKernelMetadatar   r   rc   )collectionsr  nextr.   r   loadsrj   r3   r   rF   r   r8   r   r   r   pack_metadatapacked_metadatark   rP   r*   r   r   
binary_extr  ri   r   asmkernelrr   function)r:   rk   r   rP   r  r   r   r   r$  rv   r  r   	asm_filesr*  files                  r    r>   zCompiledKernel.__init__k  s   *`.2F2F2H`a::m5578#(.)A#B (#&vi'8&.&Q\J]^#$4fT(--/=R6ST&22t}}334&44T]]C	MM&&	)7)=)=)?[AqzzRYGZT!W[	[''
!
 KKO$++ab/Z2OT__.UYUcUcUee
  hhz*  \
s   *GG)AG%c                B   | j                   y t        j                  j                         }t        j                  j	                  | j
                  | j                        | _        t        j                  j                  j                  |      d   }| j                  j                  |kD  r!t        | j                  j                  |d      t        | j                  d      rR| j                  j                  <d}| j                  j                  |kD  r!t        | j                  j                  |d      t        j                  j                  j                  | j                  | j                   | j                  j                  |      \  | _         | _        | _        | _        y )Nmax_shared_memzshared memory	tmem_sizei   ztensor memory)rr   r   r   get_current_devicelauncher_clsrk   r   runutilsget_device_propertiessharedr
   rB   r2  load_binaryr*   r,  r-  n_regsn_spills)r:   device
max_sharedmax_tmem_sizes       r    _init_handleszCompiledKernel._init_handles  s!   ;;"113==--dhhF]]((>>vFGWX
==*, !5!5z?SS4==+.4==3J3J3VM}}&&6$T]]%<%<m_]]AGATATA`A`IIt{{DMM$8$8&BB>T]DKr!   c                L    |dk(  r| j                          t        | 	  |      S )Nr5  )r?  super__getattribute__)r:   r*   	__class__s     r    rB  zCompiledKernel.__getattribute__  s&    5= w'--r!   c                   t         j                  y t        | j                  | j                  |d      }t        | j                  t              r | j                  j                  j                  |S i }d}t        | j                  j                  j                        D ]  \  }}||   ||<   |dz  } |j                  | j                  j                  j                  || j                  |f       |S )N)r*   r-  streamr   r   )r   launch_enter_hookr	  r*   r-  r/   rk   r#   r'   launch_metadatar5   r1   r  r   )	r:   gridrE  r  retarg_dictarg_idxiarg_names	            r    rG  zCompiledKernel.launch_metadata  s    ++3		t}}PVWX$((I.$((++2M2M2UJ$TXX[[%:%:; 	KAx!%gHXqLG	 	++dDMM8-LM
r!   c                <      j                          d d fd
}|S )N)rE  c                T   | =t         j                  j                         }t         j                  j                  |      }  j                  | g| } j
                  d   d   d   | j                  j                  |t        j                  t        j                  g	|  y )Nr   r   r   )r   r   r3  get_current_streamrG  r5  r-  r)  r   rF  launch_exit_hook)rE  r  r<  rG  rH  r:   s       r    runnerz*CompiledKernel.__getitem__.<locals>.runner  s    ~99;99&A2d224G$GODHHT!Wd1gtAwtG[G[]l#55~7V7V_Y]_r!   )r?  )r:   rH  rR  s   `` r    __getitem__zCompiledKernel.__getitem__  s    !% 	_ r!   )r)   ra   rb   rF  rQ  r>   r?  rB  rG  rS  __classcell__)rC  s   @r    r   r   d  s+     :B&.
r!   r   )r   BaseExceptionr\   )/
__future__r   rH   r   _C.libtritonr   r   r   backends.compilerr   r   r	   runtime.autotunerr
   runtime.cacher   r   r   runtime.driverr   tools.disasmr   code_generatorr   pathlibr   r   	functoolsr   r   ptx_prototype_patternrm   ptx_arg_type_patternrp   r   r#   re   	lru_cacher   r   r   r  r   r	  r,   r  r   rc   r!   r    <module>rc     s    "   >  )  . U U # # '  	  	  Y 	   , 	 

! !H& &R  1  1F,#$Lf5R) ) 
d 
U Ur!   