
    Vh                     t   d dl mZmZ d dlZd dlZddlmZ ddlmZ ddl	m
Z
mZmZ ddlmZ dd	lmZmZmZmZmZmZmZ d
efdZdedee   defdZd Zdddee   dee   dee   deee      deeef   f
dZdefdZdede fdZ!dddee   deee      de"edf   fdZ#dddee   deee      defdZ$y)    )AnyOptionalN   )config)AttrsDescriptorWrapper)_type_ofexpr_fits_within_32bittriton_version_uses_attrs_dict)V   )ArgNameConstexprArgKernelArgTypeSizeArg	TensorArgTMADescriptorArgWorkspaceArgnamec                     t         j                  j                  |       rIt         j                  j                         j                  dk7  ry| t         j                  j
                  vryy)NcpuTF)r   graphis_unspec_argget_current_device_or_throwtypemutated_buffers)r   s    T/home/dcms/DCMS/lib/python3.12/site-packages/torch/_inductor/codegen/triton_utils.pyshould_unwrap_unspec_argr      sJ    wwT"77..055>qww...    arg
size_dtypereturnc                f   t        | t              r| j                  t        j                  k(  rd}nu| j                  t        j
                  k(  rd}nU| j                  t        j                  k(  rd}n5| j                  t        j                  k(  rd}nt        | j                        }t        | j                        r|j                  d      }|dv ry|S |S t        | t              r| j                  t               ryy	t        |       rt               ryt        | j                  t         t"        j$                  f      ry|d
k(  ry|dk(  ry|xt        j&                  t        j(                        j*                  }t-        | j                        r5t.        j0                  j2                  j5                  | j                  |       yyt7        d|       t        | t8              rt        | j                        S t        | t:              ryt        | t<              ryt7        dt?        |        d|        )Nz*fp8e4nvz*fp8e5z*fp8e4b8z	*fp8e5b16*)fp16bf16fp32	constexprz*i8ztl.int32i32ztl.int64i64zunhandled size_dtype 	nvTmaDesc
unhandled : ) 
isinstancer   dtypetorchfloat8_e4m3fnfloat8_e5m2float8_e4m3fnuzfloat8_e5m2fnuzr   r   bufferlstripr   exprr
   _arg_equals_1floatsympyFloatiinfoint32maxr	   r   r   sizevars	guard_leqNotImplementedErrorr   r   r   r   )r   r    tyenew_tyeint_maxs        r   signature_ofrD   "   s   #y! 99+++CYY%+++CYY%///CYY%///C399%C#CJJ/jjoG**J#w88-/ #
 3$B$D 5%++"67 #:%kk%++.22G%chh/  **388W=%(=j\&JKK#|$		""#'(#|$

49+Ru=
>>r   c                 \    g }| D ]$  }t        |t              r|j                  |       & |S N)r-   r   append)	signaturenew_signaturer   s      r   non_constexpr_signaturerJ   c   s6    M &#|,  %& r   indicesrH   argdefsrL   c          	          |t        t        t        |                   }t        ||       D ci c]   \  }}||   j                  t        ||      " c}}S c c}}w )N)r    )listrangelenzipr   rD   )rH   r    rM   rL   ir   s         r   signature_to_metarT   l   s]     uS^,- '9-As 	
cjAA  s   %Ac                    | j                   }|t        j                  j                  v r|t        j                  j                  vS |t        j                  j
                  v ryt        j                  j                  r*t        j                  j                  j                  |      }nt        j                  j                  |      }|sN|t        j                  j                  j                  k(  sJ t        j                  j                  j                  }n|j                         }t        |t        j                   j"                  j$                        r|j'                          S y)NF)r4   r   r   graph_inputsaligned_inputs	constants	schedulerget_buffer_layouttry_get_bufferkerneloutput_noder   layout
get_layoutr-   r/   	_inductorirNonOwningLayoutmaybe_guard_aligned)r   buf_namer^   r4   s       r   is_unaligned_bufferre   {   s    zzH177'''qww5555177$$$ww""44X>''1qxx3388888XX))00F&&(F&%//,,<<=--///r   c                     t        | t              xr` t        | j                  t        t        j
                  f      xr4 t        j                  j                  j                  | j                  d      S )Nr   )
r-   r   r6   intr9   Integerr   r   r>   statically_known_equals)r   s    r   r7   r7      sR    3  	Bsxx#u}}!56	BGG44SXXqAr   args.c                |    |t        t        t        |                   }t        d t	        ||       D              }|S )Nc              3   >   K   | ]  \  }}t        |      s|  y wrF   )r7   ).0rS   r   s      r   	<genexpr>z&equal_1_arg_indices.<locals>.<genexpr>   s     PVQ]3=OqPs   )rO   rP   rQ   tuplerR   )rj   rL   
equal_to_1s      r   equal_1_arg_indicesrq      s9    
 uSY'(Ps7D'9PPJr   c                "   |t        t        t        |                   }dt        dt        dt
        dt
        fdt        j                  j                  rt        fdt        ||       D              }nd}t        | |      }t        ||      S )	Nx	alignmentinclude_tensorr!   c                 r   t        | t              r^|r[t        j                  j                  j                  | j                  | j                  j                  z  |      }|xr t        |        S yt        | t              rx| j                  j                  d      ry| j                  yt        | j                  t              ryt        j                  j                  j                  | j                  |      S t        | t              ryt        | t         t"        f      ryt%        dt'        |        d|        )z
        Roughly follow triton code here:
        https://github.com/openai/triton/blob/5282ed890d453e10b9ee30076ef89115dd197761/python/triton/runtime/jit.py#L208-L222
        Fload_seed_offsetTr+   r,   )r-   r   r   r   r>   statically_known_multiple_ofoffsetr.   itemsizere   r   r   
startswithr6   r8   r   r   r   r@   r   )rs   rt   ru   offset_aligneds       r   
is_alignedzconfig_of.<locals>.is_aligned   s    
 a#!"!1!1!N!NHHqww///" &D.A!.D*DDa! vv  !34vv~!&&%(77##@@SSa&a*L9:!JtAwir!"=>>r   c              3   >   K   | ]  \  }} |d d      r|  yw)   T)rt   ru   N )rm   rS   r   r}   s      r   rn   zconfig_of.<locals>.<genexpr>   s)       
3#DA  
s   r   rK   )rO   rP   rQ   r   rg   boolr   tritondivisible_by_16ro   rR   rq   r   )rj   rL   r   rp   r}   s       @r   	config_ofr      s    
 uSY'(?m ? ?T ?d ?> }}$$  
gt, 
 
 $T7;J!/:>>r   )%typingr   r   r9   r/    r   runtime.hintsr   utilsr   r	   r
   virtualizedr   commonr   r   r   r   r   r   r   strr   rD   rJ   rO   rg   dictrT   re   r   r7   ro   rq   r   r   r   r   <module>r      sC        2 T T   3 >?m >?HSM >?c >?B $(M"  ']	
 d3i  
#s(^Y 6}   $(

}

 d3i 
 38_	
  $(2?
}
2? d3i 2? 		2?r   