
    VhT                     
   d dl Z d dlZd dlZd dlmZ d dlmZmZ d dlZd dl	Z	d dl
mZmZ d dlmZ d dlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZmZmZmZ  ej@                  e!      Z"d Z#d Z$ddd fde%de%de%dee&e%e%e%e%e%f      fdZ'ejP                  dk7  rNddddddddddddddddddd ddd!ddd"ddd#ddd$ddd%ddd&ddd'ddd(ddd)ddd*ddd+ddd,ddgnE ejR                  g d-d./      D  cg c]  \  } }}d0D ]  }d1D ]  }| ||||fdd    c}}}}} Z*d2ddd3ddd4ddd5ddd6ddd7ddd8ddd9ddd:ddd;ddg
Z+d ddd#ddd'ddd$ddd(ddddddddd)ddd!ddd<ddd=ddgZ,d>ddd?ddgZ-ejP                  dk7  re*e-z   ne*Z.d@ddd:dddAdddBdddCddgZ/g dDdddEdddFdddGddd9dddHdddIdddJdddKddd=dddLdddMdddNdddOdddPdddQdddRdddSdddTdddUdddVdddWdddXdddYdddZddd[ddd\ddd]ddd^ddd_ddd`dddadddbdddcdddddddedddfdddgdddhdddidddjdddkdddldddmdddndddodddpdddqdddrddd3dddsdddtdddudddvdddwdddxdddydddzddd{ddd|ddd}ddd~dddddddddddd4dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddZ0d:dddAddddddNdddBddddddddddddCddg	Z1 e&d e*D              Z2 e&d e+D              Z3 e&d e,D              Z4 e&d e.D              Z5 e&d e/D              Z6 e&d e0D              Z7 e&d e1D              Z8e	jr                  jt                  rBe	jv                  jy                         r( e$e2      Z2 e$e3      Z3 e$e4      Z4 e$e5      Z5 e$e7      Z7 e jz                  e'e2      Z> e jz                  e'e3      Z? e jz                  e'e4      Z@ e jz                  e'e6      ZA e jz                  e'e7      ZB e jz                  e'e8      ZCdeDe   deEfdZFed        ZGede%de%deHeIef   fd       ZJd ZKd ZLd ZMddddddZNd ZOdede&eEeEf   fdZPyc c}}}}} w )    N)Sequence)Anycast)realize_inputsSymbolicGridFn)V)
OrderedSet   config)PythonWrapperCodegen)ChoiceCallerLayout)next_power_of_2)get_backend_num_stagesget_num_smsTMA_DESCRIPTOR_SIZEuse_aten_gemm_kernelsc                 $    ddl m}  ||| |      S )Nr   )Config)
num_stages	num_warps)tritonr   )r   r   kwargsr   s       P/home/dcms/DCMS/lib/python3.12/site-packages/torch/_inductor/kernel/mm_common.pytriton_configr      s    &Z9EE    c                 @    t               t        fd| D              S )Nc              3   D   K   | ]  }|d    |d   |d   |d   f  yw)r      r
      N ).0crocm_num_stagess     r   	<genexpr>z*build_rocm_gemm_configs.<locals>.<genexpr>&   s,     Lq!A$!adOQqT:Ls    )r   tuple)configsr%   s    @r   build_rocm_gemm_configsr)   $   s    ,.OLGLLLr   Fr    c                      y)NFr"   )mnks      r   <lambda>r.   0   s    r   r+   r,   r-   r(   c              #     K   ddl m} |j                  j                  }d}	|rdnd}
t	        t        t        j                  j                  j                  | t        j                  j                  j                              |	      } t	        t        t        j                  j                  j                  |t        j                  j                  j                              |	      }t	        t        t        j                  j                  j                  |t        j                  j                  j                              |
      }t        t        t        df             }|D ]W  \  }}}}}t	        t!        t        ||z        |       |	      }t	        t!        t        ||z        |      |	      }t	        t!        t        ||z        |      |
      } ||||      r{t!        |||z  dz        }t        j"                  j$                  rjd}d	D ]a  }|dk7  r||z  dk7  s||z  dk7  r|||||||f|vs%|t'        |      |k  s6|j)                  |||||||f       t+        |||||||       c |||||df|vs|t'        |      |k  s/|j)                  |||||df       t+        |||||       Z y
w)z
    Heuristic to shrink configs when they are bigger than the input size

    :param scale: scale factor applied to the config values
    :param exclude: whether a given config should be excluded
    r   r          )fallback.   r
   )r   r0   N)BLOCK_MBLOCK_NBLOCK_Kr   r   matrix_instr_nonkdimkpackr4   r5   r6   r   r   )torch._inductorr   test_configsmax_mm_configsmaxr   r   graphsizevars	size_hinttorch	_inductorunbacked_symint_fallbackr	   r'   intminversionhiplenaddr   )r+   r,   r-   r(   has_int8_tensorscaleexcluder   r<   min_block_sizemin_block_size_kusedblock_mblock_nblock_kr   r   r8   r7   s                      r   filtered_configsrS   )   s     '((77NN -r"GG&&//HH ' 	
 		A 	GG&&//HH ' 	
 		A 	GG&&//HH ' 	
 		A eCHo&(D<C ?8':yc#go.2NCc#go.2NCc#go.24DE7GW- 	7W#4#;<	==E(/ &$'1,22a7!55:  (  #*c$i..HHH###&%0!
 ( ' ' '#-"+-A# =&P ':y!DDP&#d)n*D'7GZANO####)' s?s   H0J?3J?:J? J?-J?
EXHAUSTIVE)r1   r1   r0   r    r
   T)r   cond)r1   r1      r
   r!   )r1   @   r1         )rW   r1   r1   rX   rY   )rW   r1   rV   rX   r!   )rW   rW   r0   r
   r!   )rW   rW   r1   r
   r!   )rW   rW   rW      rY   )rW   rW   rV   rX   r!   )rW   rV   r1   rZ   r!   )rW   rV   r1   r!   rY   )rW   rV   rW   rZ   r!   )rW   rV   rV   r!   r!   )rV   rW   r1   rZ   r!   )rV   rW   r1   r!   rY   )rV   rV   r1   r
   rY   )rV   rV   r1   rZ   r!   )rV   rV   rW   rZ   r!   )rV   rV   rW   rX   rY   )r0   r1   rW   rV   r3   rZ   )repeat)r    r
   rZ   r!   rX   )r
   r!   rY   )r0   r1   r0   rZ   r
   )r0   r1   r1   r!   r
   )r0   r1   r1   rX   r
   )rW   rW   rV   rZ   r!   )rV   rW   r1   r
   r
   )rV   rW   rW   rZ   rY   )rV   rW   rV   r!   rY   )rV   rV   r1   r!   r!   )rV   rV   rW   rZ   rY   )rV   rV   rW   rX   r!   )rV   r3   rV   rZ   rY   )r3   rV   rV   rZ   rY   )r0   rV   r3   rZ   r!   )r0   rV   r3   rX   rY   )rV   r3   rW   rZ   rY   )rV   rV   rV   rZ   rY   )rV   rV   rV   rZ   r!   )rV   rV   rW   r!   rY   )rV   r3   r1   rZ   rY   )r3   rV   r1   rZ   rY   )r3   rW   r1   r!   r!   )rW   r3   r1   r!   r!   )rV   rW   r1   r!   r!   )rW   rV   r1   r!   r!   )rV   r1   r1   r!   r!   )rW   r1   r1   rX   r
   )r3   rW   rV   r!   r!   )rW   r3   rV   r!   r!   )rV   rV   rV   r!   r!   )rV   rW   rW   r!   r!   )rW   rV   rW   r!   r!   )rV   r1   rW   r!   r!   )rW   r1   rW   rX   r
   )r0   r1   r1   r
   r
   )r0   rW   r1   r
   r
   )r0   rV   r1   r
   r!   )r0   r3   r1   r
   r!   )r0   r1   rW   r
   r
   )r0   rW   rW   r
   r
   )r0   rV   rW   r
   r!   )r0   r3   rW   r
   r!   )r1   r1   r1   r
   r
   )r1   rW   r1   r
   r
   )r1   rV   r1   r
   r!   )r1   r3   r1   r
   r!   )r1   r1   rW   r
   r
   )r1   rW   rW   r
   r
   )r1   rV   rW   r
   r!   )r1   r3   rW   r
   r!   )r0   r1   r1   rZ   r
   )r0   rW   r1   rZ   r
   )r0   rV   r1   rZ   r!   )r0   r3   r1   rZ   r!   )r0   r1   rW   rZ   r
   )r0   rW   rW   rZ   r
   )r0   rV   rW   rZ   r!   )r0   r3   rW   rZ   r!   )r1   r1   r1   rZ   r
   )r1   rW   r1   rZ   r
   )r1   rV   r1   rZ   r!   )r1   r3   r1   rZ   r!   )r1   r1   rW   rZ   r
   )r1   rW   rW   rZ   r
   )r1   rV   rW   rZ   r!   )r1   r3   rW   rZ   r!   )r0   rW   r1   r!   r
   )r0   rV   r1   r!   r!   )r0   r3   r1   r!   r!   )r0   r1   rW   r!   r
   )r0   rW   rW   r!   r
   )r0   rV   rW   r!   r!   )r0   r3   rW   r!   r!   )r1   r1   r1   r!   r
   )r1   rW   r1   r!   r
   )r1   rV   r1   r!   r!   )r1   r3   r1   r!   r!   )r1   r1   rW   r!   r
   )r1   rW   rW   r!   r
   )r1   rV   rW   r!   r!   )r1   r3   rW   r!   r!   )r0   rW   r1   rX   r
   )r0   rV   r1   rX   r!   )r0   r3   r1   rX   r!   )r0   r1   rW   rX   r
   )r0   rW   rW   rX   r
   )r0   rV   rW   rX   r!   )r0   r3   rW   rX   r!   )r1   r1   r1   rX   r
   )r1   rW   r1   rX   r
   )r1   rV   r1   rX   r!   )r1   r3   r1   rX   r!   )r1   r1   rW   rX   r
   )r1   rW   rW   rX   r
   )r1   rV   rW   rX   r!   )r1   r3   rW   rX   r!   )r0   r1   r1      r
   )r0   rW   r1   r\   r
   )r0   rV   r1   r\   r!   )r0   r3   r1   r\   r!   )r0   r1   rW   r\   r
   )r0   rW   rW   r\   r
   )r0   rV   rW   r\   r!   )r0   r3   rW   r\   r!   )r1   r1   r1   r\   r
   )r1   rW   r1   r\   r
   )r1   rV   r1   r\   r!   )r1   r3   r1   r\   r!   )r1   r1   rW   r\   r
   )r1   rW   rW   r\   r
   )r1   rV   rW   r\   r!   )r1   r3   rW   r\   r!   )rV   rV   rV   r!   rY   )rV   rV   rV   rX   r!   )rV   rV   rV   rX   rY   )rV   rV   rV   r\   rY   c           	   #      K   | ]8  }|d    r1t        t        t        t        t        t        t        f   |d          : ywrU   r   Nr   r'   rD   r#   r   s     r   r&   r&   k  s;      f~ 	sCc3&	')9:   >A c           	   #      K   | ]8  }|d    r1t        t        t        t        t        t        t        f   |d          : ywr^   r_   r`   s     r   r&   r&   p  ;      "f~ 	sCc3&	')9:"ra   c           	   #      K   | ]8  }|d    r1t        t        t        t        t        t        t        f   |d          : ywr^   r_   r`   s     r   r&   r&   u  s;      f~ 	sCc3&	')9:ra   c           	   #      K   | ]8  }|d    r1t        t        t        t        t        t        t        f   |d          : ywr^   r_   r`   s     r   r&   r&   z  rc   ra   c           	   #      K   | ]8  }|d    r1t        t        t        t        t        t        t        f   |d          : ywr^   r_   r`   s     r   r&   r&     s;      'f~ 	sCc3&	')9:'ra   c           	   #      K   | ]8  }|d    r1t        t        t        t        t        t        t        f   |d          : ywr^   r_   r`   s     r   r&   r&     s;      #f~ 	sCc3&	')9:#ra   c           	   #      K   | ]8  }|d    r1t        t        t        t        t        t        t        f   |d          : ywr^   r_   r`   s     r   r&   r&     s;      .f~ 	sCc3&	')9:.ra   )r(   choicesreturnc                     t        |       dk(  rFt               s<t        j                  rt        j                  d       yt        j                  d       yy)Nr   zNo choices for GEMM, using ATen backend as fallback. This behavior is being deprecated. Please add include Aten in max_autotune_gemm_backends.Ta  No choices for GEMM, chose not to fallback to ATen backend. To temporarily change this behavior, set autotune_fallback_to_aten to True via TORCHINDUCTOR_AUTOTUNE_FALLBACK_TO_ATEN=1, but this knob is being deprecated. The long term fix is to include Aten in max_autotune_gemm_backends.F)rH   r   inductor_configautotune_fallback_to_atenlogwarning)ri   s    r   should_fallback_to_atenrp     sN    
7|q!6!844KKl KKV r   c                :     || |d          |||d         z  ddfS )z9
    The CUDA grid size for matmul triton templates.
    r4   r5   r    r"   )r+   r,   metacdivs       r   mm_gridrt     s,    
 DO$tAtI'??AFFr   MNrr   c          	      N     ||d    || |d          |||d         z        ddfS )z(Defines the grid for persistent kernels.NUM_SMSr4   r5   r    r"   )ru   rv   rr   rs   rE   s        r   persistent_mm_gridry     s=     	DOT!T)_5QY8PPQ		 r   c                 r    | t         j                  t         j                  fv ryd|  j                  dd      S )Nz
tl.float32ztl.ztorch. )rA   float16bfloat16replace)dtypes    r   acc_typer     s3    //=  2..r   c           
         t        j                  || j                  d         | j                  d   k(  }t        j                  j
                  j                  j                  xr/ t        j                   xs |dz  dk(  xr |dz  dk(  xr |dz  dk(  }t        dd||t        |j                        | j                  | j                  d| j                  S )z4
    Common options to matmul triton templates.
    r6   r0   r   rY   )GROUP_MEVEN_K
ALLOW_TF32ACC_TYPEr   r   r"   )sympygcdr   rA   backendscudamatmul
allow_tf32rl   force_same_precisiondictr   r   r   r   )r   sym_msym_nsym_klayouteven_k_symbolicr   s          r   
mm_optionsr     s     			%y12fmmI6NN  $$++66 000 	JRZAH52:!"3Hq8H   &,,'$$"" -- r   c                     t        | j                  j                          |j                  j                          t               t              S )N)A_ROW_MAJORB_ROW_MAJORrx   TMA_SIZE)r   r   is_transposedr   r   )mat1mat2s     r   persistent_mm_optionsr     s=    11331133$	 r   )r   	out_dtypeuse_4x2_dimmat2_transposedc          	         t        | |      \  } }| j                         ^ }}}	|r|j                         ^ }
}}n|j                         ^ }
}}t        ||
      D cg c]/  \  }}t        j                  j
                  j                  ||      1 c}}}|r|dz  }t        j                  j
                  j                  |	|      }|7ddlm} || j                         } || j                         |g |||      }n	|J d       ddlm} |D cg c]  }t         |||j                                }}||||| |g|S c c}}w c c}w )z4
    Common arg processing for mm,bmm,addmm,etc
    r
   r   )FixedLayoutz,out_dtype is ignored if layout is specified.)expand)r   get_sizezipr   r>   r?   guard_equalstorch._inductor.irr   	get_dtype
get_deviceloweringr   size)r   r   r   r   r   r   othersb1r+   k1b2r,   k2abr-   r   r   xs                      r   mm_argsr     sA     d+JD$JRB]]_
Q]]_
R9<REA			&	&q!	,EA!V	%%b"-A~2(IOOaJJAJ
  P"PP !>DEnVAv{{34EFEq!VT41&11+ 	F& Fs   4E#Ec                       fd}|S )Nc                 P   dk7  r>t         j                  j                  | t         j                  j                              } dk7  r>t         j                  j                  |t         j                  j                              }t         j                  j	                  | |      S )Nr    )r   opsmulconstantrI   )accbiasalphabetar   s     r   epiloguez addmm_epilogue.<locals>.epilogue,  sl    A:%%))Cu!=>C195599T155>>$#>?Duuyyd##r   r"   )r   r   r   r   s   ``` r   addmm_epiloguer   +  s    $ Or   r   c                     d}t        j                  | j                        }|;d}| j                  D ]&  }t        j                  |      }||dk(  s!d} d|fS  d|fS d}|D ]  }||z  }	 |dkD  }||fS )a^  
    Check if input tensors and output layout have static shapes and non-zero sizes.

    Args:
        layout: Output layout object with a 'size' attribute.

    Returns:
        Tuple[bool, bool]: (is_static, is_nonzero)
            is_static: True if all shapes are statically known
            is_nonzero: True if all dimensions are non-zero
    Tr   Fr    )r   %statically_known_list_of_ints_or_noner   statically_known_int_or_none)r   static_shapestatic_sizenonzerossznumeldims           r   _is_static_problemr   6  s     L&LLK  	A%BB1EB~"'g~	
 g~E aiG  r   )Q	functools	itertoolsloggingcollections.abcr   typingr   r   r   rA    torch._inductor.select_algorithmr   r   torch._inductor.virtualizedr   torch.utils._ordered_setr	   r{   r   rl   codegen.wrapperr   irr   r   runtime.runtime_utilsr   utilsr   r   r   r   	getLogger__name__rn   r   r)   rD   r'   rS   max_autotune_gemm_search_spaceproductmm_kernel_configsextra_mm_kernel_configsint8_mm_kernel_configsmixed_mm_kernel_configs_small_mmixed_mm_kernel_configspersistent_mm_kernel_configsscaled_mm_kernel_configs#scaled_persistent_mm_kernel_configsmm_platform_configsextra_mm_platform_configsint8_platform_configsmixed_mm_platform_configspersistent_mm_platform_configsscaled_mm_platform_configs%scaled_persistent_mm_platform_configsrF   rG   r   is_availablepartial
mm_configsextra_mm_configsint8_mm_configspersistent_mm_configsscaled_mm_configsscaled_persistent_mm_configslistboolrp   rt   r   strry   r   r   r   r   r   r   r9   s   00000r   <module>r      s>      $    K ) / ( 2 % 3  g!FM 
!r
r
r r eCc3345	r\ 55E) &t4&5%t4%t4&5%t4%t4%t4&5&5&5&5'6&5&5'6'6'6'6'0 *;):):"1*

 
%GWg *

 "
  Wgz9EtT
T
T
/ N "40!40!40"D1"D1"D1#T2#T2#T2#T2  "40"D1"D1"D1"D1!40!40#T2!40 %d3$d3 & $T2#T2#  55E 77	  $T2#T2$d3$d3#T2  b#T2b#T2b #D1b #D1	b
 $T2b #D1b #D1b #D1b "40b %d3b $T2b $T2b %d3b #D1b #D1b  #D1!b" "40#b$ "40%b& "40'b( #D1)b* #D1+b, "40-b. "40/b0 #D11b2 #D13b4 "405b6 "407b8 #D19b: #D1;b< "40=b> "40?b@ #D1AbB #D1CbD "40EbF "40GbH #D1IbJ #D1KbL "40MbN "40ObP #D1QbR #D1SbT "40UbV "40WbX #D1YbZ #D1[b\ "40]b^ "40_b` #D1abb #D1cbd "40ebf "40gbh #D1ibj #D1kbl "40mbn "40obp #D1qbr #D1sbt "40ubv "40wbx #D1ybz #D1{b| "40}b~ "40b@ #D1AbB #D1CbD "40EbF "40GbH #D1IbJ #D1KbL "40MbN "40ObP #D1QbR #D1SbT "40UbV "40WbX #D1YbZ #D1[b\ "40]b^ "40_b` #D1abb #D1cbd "40ebf "40gbh #D1ibj #D1kbl "40mbn "40obp #D1qbr #D1sbt "40ubv "40wbx #D1ybz #D1{b| "40}b~ "40b@ #D1AbB #D1Cb J $T2$d3$d3$d3$d3$d3$d3$d3#T2
' #  #  
 " ")"  
  (  
 " ")"  
 "' '.' " 
 # #*#  
 ). .5. ) % 	==00212EF 78Q R34IJ 78Q R!89S!TY

 %9$$% 
 $)##!
 *	))* 
 &I%%& 
  1y001  T,%7 D & G G # # T#s(^  /. '2T!v !%d
*; !}
s   
#U