
    mVhJ                         d dl mZmZ d dlmZmZmZmZ d dlm	Z	 d dl
mZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ defd	Zd
 Z e	d       G d d             Z G d de      Zy)    )BaseBackend	GPUTarget)irpassesllvmamd)	dataclass)AnyDictTuple)
ModuleTypeN)Pathtargetc                     d S )Nc                      y)N   r   r    )lhsTyperhsTypes     L/home/dcms/DCMS/lib/python3.12/site-packages/triton/backends/amd/compiler.py<lambda>zmin_dot_size.<locals>.<lambda>   s        r   r   s    r   min_dot_sizer      s    --r   c                 F    | dk(  rdnd}t        j                  d|      dk(  S )Ngfx94210TRITON_HIP_USE_BLOCK_PINGPONG)osgetenv)archdefaults     r   is_pingpong_enabledr%      s'    X%c3G994g>#EEr   T)frozenc                      e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   dZ	eed	<   dZ
eed
<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZee   ed<   dZee   ed<   dZeed<   dZee   ed<   dZeed<   dZeed<   dZeed <   dZeed!<   dZeed"<   dZ eed#<   d$Z!eed%<   d&Z"eed'<   d( Z#d) Z$y)*
HIPOptions   	num_warpsr   waves_per_eu   
num_stagesnum_ctasr   num_buffers_warp_specnum_consumer_groupsreg_dec_producerreg_inc_consumerNextern_libsr   cluster_dimsFdebugTsanitize_overflowr#   )fp8e5supported_fp8_dtypesr   deprecated_fp8_dtypesieeedefault_dot_input_precision)r:   allowed_dot_input_precisionsenable_fp_fusionlaunch_cooperative_gridmatrix_instr_nonkdimkpackallow_flush_denormmax_num_imprecise_acc_defaulthipbackend_namenoneinstruction_sched_variantc                 x   t        t              j                  dz  }| j                  i nt	        | j                        }d| j
                  v sd| j
                  v sd| j
                  v rdnd}t        j                  | d|       | j
                  dk(  rd	n| j                  }t        j                  | d
|       ddg}|D ]  }t        || dz        ||<    t        j                  | dt        |j                                      | j                  dkD  r| j                  | j                  d	z
  z  dk(  sJ d       y )Nlibgfx10gfx11gfx12    @   	warp_sizegfx950r   r@   ocmlocklz.bcr3   r   znum_warps must be a power of 2)r   __file__parentr3   dictr#   object__setattr__r@   strtupleitemsr*   )selfdefault_libdirr3   rN   r@   libsrH   s          r   __post_init__zHIPOptions.__post_init__G   s'   h..6 ,,4b$t?O?O:P!TYY.'TYY2F'UYU^U^J^Bdf	4i8YY(*

4%0 	AC">se3K#?@K	A4k6G6G6I0JK~~!t~~!9K'LQR&R 	0/	0R&Rr   c           	          dj                  | j                  j                         D cg c]  \  }}| d|  c}}      }t        j                  |j                  d            j                         S c c}}w )N_-utf-8)join__dict__rY   hashlibsha256encode	hexdigest)rZ   namevalkeys       r   hashzHIPOptions.hashW   s]    hh9L9L9NOID#4&#OP~~cjj12<<>> Ps   A4
)%__name__
__module____qualname__r*   int__annotations__r+   r-   r.   r/   r0   r1   r2   r3   rT   r4   rX   r5   boolr6   r#   rW   r8   r   r9   r;   r<   r=   r>   r?   r@   rA   rB   rD   rF   r]   rk   r   r   r   r(   r(      s"   IsL#JHc!"3"  ccK#L%#E4"t"D#'2%*2(*5:*'--/9 %*9!d!$)T) !#!E3N$$)*!3*L#& &,s+0 ?r   r(   c                   n    e Zd Zedefd       Zdeddf fdZdefdZd Z	d Z
deeef   fd	Zd
 Ze ej"                         d               Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zd Z ej"                         d        Z xZS )
HIPBackendr   c                      | j                   dk(  S )NrC   )backendr   s    r   supports_targetzHIPBackend.supports_target^   s    ~~&&r   returnNc                 j    t         |   |       t        |j                  t              sJ d| _        y )Nhsaco)super__init__
isinstancer#   rW   
binary_ext)rZ   r   	__class__s     r   r{   zHIPBackend.__init__b   s+     &++s+++!r   c                    dt        j                  d| j                  j                        i}| j                  j                  dv rBt	        t
        j                        }|j                  dh       t        t        |            |d<   d|vrt	        t
        j                        }| j                  j                  dv r|j                  h d       n+| j                  j                  dv r|j                  d	d
h       t        t        |            |d<   d|vrt        j                  dd      dk(  |d<   |j                  t
        j                  j                         D ci c]  }||v s||   |||    c}       t        di |S c c}w )Nr#   TRITON_OVERRIDE_ARCH)gfx940gfx941r   tf32r<   r8   >   fp8e4b8fp8e4nvfp8e5b16rO   r   r7   r=   TRITON_DEFAULT_FP_FUSIONr   r   )r!   r"   r   r#   setr(   r<   updaterX   sortedr8   __dataclass_fields__keys)rZ   optsargsr<   r8   ks         r   parse_optionszHIPBackend.parse_optionsg   s`   		"8$++:J:JKL ;;==+.z/V/V+W((//938@\9]3^D/0!-#&z'F'F#G {{#AA$++,NO!!h/$++Y,@A+08L1M+ND'(T)')yy1KS'QUX'XD#$)H)H)M)M)OuASTX\S\aefgahatQQZuv!D!! vs   	F'F-Fc                     |j                   |j                  |j                  |j                  d   |j                  d   |j                  d   fS )Nr   r   r,   )r*   r.   sharedr4   )rZ   metadatas     r   pack_metadatazHIPBackend.pack_metadata}   sO    OO!!!$!!!$!!!$
 	
r   c                 4    dt        | j                        i}|S )Nr   )r   r   )rZ   optionscodegen_fnss      r   get_codegen_implementationz%HIPBackend.get_codegen_implementation   s    %|DKK'@Ar   c                     ddl m} d|iS )Nr   )	libdeviceztriton.language.extra.libdevice)triton.language.extra.hipr   )rZ   r   s     r   get_module_mapzHIPBackend.get_module_map   s    719==r   c                 .    t        j                  |       y N)r   load_dialects)rZ   ctxs     r   r   zHIPBackend.load_dialects   s    #r   c                  H    t         j                  j                  dd      dk(  S )NAMDGCN_USE_BUFFER_OPSr   r   )r!   environgetr   r   r   use_buffer_opszHIPBackend.use_buffer_ops   s     zz~~5s;sBBr   c                     dd l }d}t        | d      r| j                         |k  S t        | |j                        r-t        | d      r!| j                         j                         |k  S y)Nr   i	ptr_rangeuntyped_storageF)torchhasattrr   r|   Tensorr   size)argr   
MAX_INT_32s      r   is_within_2gbzHIPBackend.is_within_2gb   s]    
3$==?j00c5<<(WS:K-L&&(--/:==r   c                 H    t        j                  |       }d| v r|ddggz  }|S )NSztt.pointer_rangerL   )r   
parse_attr)descrets     r   r   zHIPBackend.parse_attr   s1    $$T*$;',--C
r   c                     t        j                  | |fi |}t        j                         r|dk(  rt        j	                  |       r|dz  }|S )Ntensorr   )r   get_arg_specializationrs   r   r   )r   tykwargsr   s       r   r   z!HIPBackend.get_arg_specialization   sI    00bCFC $$&2>j>V>VWZ>[3JC
r   c                  P   t        j                  d      } | t        |       }|j                         r|S t        t              j
                  dz  }|j                         r|S t        d      }|j                         r|S t        d      }|j                         r|S t        d      )NTRITON_HIP_LLD_PATHzllvm/bin/ld.lldz/opt/rocm/llvm/bin/ld.lldz/usr/bin/ld.lldzWROCm linker /opt/rocm/llvm/bin/ld.lld not found. Set 'TRITON_HIP_LLD_PATH' to its path.)r!   r"   r   is_filerR   rS   	Exception)lld_env_pathllds     r   path_to_rocm_lldzHIPBackend.path_to_rocm_lld   s     yy!67#|$C{{}
8n##&77;;=J./;;=J$%;;=Jqrrr   c                    t        j                  | j                        }|j                          t        j
                  j                  |       t        j                  j                  |       t        j
                  j                  |       t        j                  j                  |       t        j                  j                  |       t        j
                  j                  |       t        j
                  j                  |       t        j
                  j                  |       t        j                  j                  |       |j!                  |        | S r   )r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointeradd_canonicalizeradd_combineadd_reorder_broadcastadd_cseadd_licmadd_symbol_dceadd_loop_unrollrun)modr   r   pms       r   	make_ttirzHIPBackend.make_ttir   s    __S[[)
!!"%..r2''+#))"-b!r"$$R(##B'
s
r   c                 .
   t        j                  | j                        }|j                          t        j
                  j                  |d|j                   |j                  |j                  |j                         |j                  |        t        j                  | j                        }|j                          t        j                  j                  |       t        j                  j                  |       t        j                  j                  |       t         j                  j                  j#                  ||j                  |j$                  |j&                         t        j                  j                  |       t         j                  j                  j)                  |       t        j                  j+                  |d       t         j                  j                  j-                  |       t/        t1        j2                  dd            }t/        t1        j2                  dd            }|j4                  dk(  rdx}}t!        j6                  |j                        rk|j8                  dk7  sJ d	       t         j                  j                  j;                  ||j8                  ||       t        j<                  j?                  |       |j4                  jA                         d
k7  r4t         j                  j                  jC                  ||j4                         t        j                  j+                  |d       t        j                  j                  |       t        j                  jE                  |       t!        j6                  |j                        rxt         j                  j                  jG                  |       tI        |j                        }|r8|j8                  dk(  r)t         j                  j                  jK                  |       tL        jO                         r|t         j                  j                  jQ                  |       t        j<                  j?                  |       t         j                  j                  jS                  ||j                         t        j<                  j?                  |       t        j<                  jU                  |       t        j<                  jW                  |       |j                  |        | S )Nzhip:TTRITON_HIP_GLOBAL_PREFETCHr   TRITON_HIP_LOCAL_PREFETCHzlocal-prefetchr   r   zTriton AMD backend pipeliner has been updated. We used to trigger software pipelining with num_stages == 0. Now it will not happen anymore; please update to use num_stages == 2 for equivalent behavior in the past.rE   r,   ),r   r   r   r   r   r   add_convert_to_ttgpuirr#   r*   rN   r.   r   ttgpuiradd_coalesceadd_remove_layout_conversionsadd_optimize_thread_localityr   add_accelerate_matmulr?   r@   add_optimize_epilogueadd_optimize_dot_operandsadd_hoist_layout_conversionsro   r!   r"   rF   has_matrix_core_featurer-   add_stream_pipeliner   r   lowerinsert_instruction_sched_hintsadd_reduce_data_duplicationadd_reorder_instructionsr%   add_block_pingpongrs   r   add_canonicalize_pointersadd_convert_to_buffer_opsr   r   )r   r   r   r   global_prefetchlocal_prefetchuse_block_pingpongs          r   
make_ttgirzHIPBackend.make_ttgir   sZ   __S[[)
**2gll^/DgFWFWY`YjYj+2+;+;	=
s__S[[)
##B'44R833B7

00W\\7C_C_ahanano44R8

00400T:

77;bii(DcJKRYY'BCHI ,,0@@/00On&&w||4%%* Q .P Q*
 JJ222w7I7I?\jkMM++B/,,224>JJ==b'BcBcd00T:44R82226&&w||4JJ77;!4W\\!B!g&8&8A&=

""55b9$$&JJ88<MM++B/JJ88W\\J''+b!$$R(
s
r   c                 p   | }t        j                  |j                        }|j                          t        j
                  j                  j                  ||j                         d}t        j
                  j                  j                  ||j                  |       t
        j                  j                  |       t
        j                  j                  |       t
        j                  j                  |       d}t        j
                  j                  j                  ||j                  |       t
        j                  j!                  |       t
        j                  j#                  |       t
        j                  j%                  |       t
        j                  j'                  |       t
        j                  j!                  |       t
        j                  j#                  |       t
        j                  j)                  |       |j*                  j-                         dk7  r?t        j
                  j                  j/                  ||j                  |j0                         t2        j4                  j7                  dd      dk(  rt
        j8                  j;                  |       t        j
                  j                  j=                  ||       |j?                  |       tA        jB                          tA        j                         }tA        jD                  ||      }t	        jF                  |       d}	t2        j4                  j7                  dd      dk(  rd	}	tA        jH                  |t        jJ                  |j                  |	       t	        jL                  ||j                         t	        jN                  |d
       t	        jP                  |dd       t	        jP                  |dd       t	        jP                  |dd       t	        jP                  |d|jR                  dk(         |jU                         D 
cg c]  }
|
jW                         r|
 }}
|d   jY                  t        jZ                         |d   j]                  dd|j^                  |jR                  z          |d   j]                  d|j`                          |jb                  rdnd}|d   j]                  d|       t2        j4                  j7                  dd      dk(  r'|d   je                  d	       |d   jg                          t	        jh                  |d          t2        j4                  j7                  dd      dk(  r\tk        tl              jn                  dz  }tq        |dz        tq        |dz        tq        |dz        g}tA        jr                  ||       nW|jt                  rK|jt                  D cg c]  \  }}t	        jv                  ||      s|  }}}tA        jr                  ||       tA        jx                  |t@        jz                  |j                  dg |j|                         | j                  d      |d<   t	        j                  |       t	        j                  |       tq        |      S c c}
w c c}}w )Nr   TrE   TRITON_DISABLE_LINE_INFOr    TRITON_ENABLE_ASANr   +xnacki  __oclc_finite_only_optF__oclc_correctly_rounded_sqrt32__oclc_unsafe_math_opt__oclc_wavefrontsize64rM   zamdgpu-flat-work-group-sizez1,zamdgpu-waves-per-euzpreserve-signr:   zdenormal-fp-math-f32rH   z
asanrtl.bczocml.bczockl.bcz
ttg.sharedr   )Br   r   r   r   r   r   r   %add_decompose_unsupported_conversionsr#   add_optimize_lds_usageconvertadd_scf_to_cfadd_index_to_llvmiradd_allocate_shared_memoryadd_to_llvmirr   r   r   add_cf_to_llvmiradd_arith_to_llvmirr   rF   r   lower_instruction_sched_hintsr-   r!   r   r   llvmiradd_di_scopeadd_builtin_func_to_llvmirr   r   init_targets	to_moduleattach_target_tripleattach_datalayoutTARGET_TRIPLEset_isa_versionset_abi_versionset_bool_control_constantrN   get_functionsis_declarationset_calling_convCALLING_CONV_AMDGPU_KERNELadd_fn_attrr*   r+   rA   add_fn_target_featureadd_fn_asan_attrset_all_fn_arg_inregr   rR   rS   rW   link_extern_libsr3   need_extern_liboptimize_moduleOPTIMIZE_O3r=   get_int_attrcleanup_bitcode_metadatadisable_print_inline)srcr   r   r   r   custom_lds_size_HIPBackend__HIP_FTZr   llvm_modtarget_featuresfnfnsdenormal_moder[   pathsrh   paths                    r   	make_llirzHIPBackend.make_llir  s   __S[[)


@@W\\R 

11"gllOT$$R(**2.11"5 	

((W\\9E''+b!''+**2.''+b!$$R(,,224>JJ<<RwOaOab::>>4c:cAMM&&r*

55b)D
s 	,,.>>#w/  *::>>.4;&Ox):):GLL/Z 	Hgll3Hc*%%h0H%P%%h0QSWX%%h0H%P%%h0H'J[J[_aJab %224PbB<M<M<OrPPA > >?A8Bw?P?PQXQbQb?b>c:de 	A0W5I5I4JL+2+E+E6A1=A::>>.4;F((2F##%
 	  Q(::>>.4;!(^22U:NN\12NY./NY./E
 !!(E2  .5.A.AiltTSEXEXYacgEhTiEi!!(E2Xt'7'7r2wOgOgh !--l;$$X. 	  *8}Y Q@ js   >Z-Z-,Z2Z2c           	      N   t        j                  d|       }t        |      dk(  sJ |d   |d<   t        j                  | t
        j                  |j                  dg |j                  d      }t        j                  j                  dd      d	k(  rt        d
       t        |       |S )Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r   r   rh   r   FAMDGCN_ENABLE_DUMPr   r   z!// -----// AMDGCN Dump //----- //)refindalllenr   translate_to_asmr   r  r#   r=   r!   r   r   print)r  r   r   namesamdgcns        r   make_amdgcnzHIPBackend.make_amdgcny  s    
 

QSVW5zQ 8&&sC,=,=w||RQSU\UmUmotu::>>.4;56&Mr   c                    d}t         j                  j                  dd      dk(  rd}t        j                  | |j
                  |      }t        j                         }t        j                         5 }t        j                         5 }t        |j                  d      5 }|j                  |       d d d        t        j                  |ddd	|j                  d
|j                  g       d d d        t        |j                  d      5 }	|	j                         }
d d d        d d d        
S # 1 sw Y   zxY w# 1 sw Y   NxY w# 1 sw Y   +xY w# 1 sw Y   
S xY w)Nr   r   r   r   r   wbz-flavorgnuz-sharedz-orb)r!   r   r   r   assemble_amdgcnr#   rs   r   tempfileNamedTemporaryFileopenrh   write
subprocess
check_callread)r  r   r   r  ry   	rocm_pathtmp_outtmp_infd_infd_outr   s              r   
make_hsacozHIPBackend.make_hsaco  s)   ::>>.4;&O##CG//1	((* 	$g,,. q&&++t, 'KK&'%%y)UIv{{\`bibnbn&opq gllD) $Vkkm$	$ 
' 'q q$ $	$ 
sT   1ED4D(/8D4'EE E(D1-D44D=	9E E		EEc                 b      fd|d<    fd|d<    fd|d<    fd|d<    fd	|d
<   y )Nc                 *    j                  | |      S r   )r   r  r   r   rZ   s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>      t~~c8W/U r   r   c                 *    j                  | |      S r   )r   rC  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>      Xw0W r   ttgirc                 *    j                  | |      S r   )r$  rC  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  rD  r   llirc                 *    j                  | |      S r   )r.  rC  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    1A1A#xQX1Y r   r-  c                 *    j                  | |      S r   )r@  rC  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  rF  r   ry   r   )rZ   stagesr   s   ` `r   
add_stageszHIPBackend.add_stages  s1    UvWwUvYxWwr   c                 z    t        j                  t        j                         dgd      }| d| j                   S )Nz	--versionra   )encodingr`   )r8  check_outputrs   r   r   )rZ   versions     r   rk   zHIPBackend.hash  s8    )):+F+F+H+*Vahi!DKK=))r   )rl   rm   rn   staticmethodr   rv   r{   r
   r   r   r   r   rW   r   r   r   	functools	lru_cacher   r   r   r   r   r   r   r$  r.  r@  rM  rk   __classcell__)r~   s   @r   rs   rs   \   sg   '	 ' '"y "T "
"S ",
>S*_ 5 >
 YC  C       s s&   3 3j i iV     X Y* *r   rs   )triton.backends.compilerr   r   triton._C.libtritonr   r   r   r   dataclassesr	   typingr
   r   r   typesr   rd   r4  r!   r'  r8  rS  pathlibr   r   r%   r(   rs   r   r   r   <module>r\     sn    ; 5 5 ! # #    	 	   . .
F
 $?? ?? ??DG* G*r   