
    nVhj8                       d dl mZ ddlmZ ddlmZ ddlmZ d<dZd<dZej                  ed	               Z
ej                  e ej                  d
      d                      Zej                  e ej                  d      d=d                     Zej                  ed=d              Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zej                  e ej,                  ddd      d>d                     Zej                  e ej,                  dd      d?d                     Zed         Zed!        Zed"        Zed#        Zej                  e ej,                  d$dd      d>d%                     Zej                  e ej,                  d&d      d?d'                     Zed(        Zd@d*Z ej                  e ej,                  d+d),      dAdBd-                     Z!ed.        Z"ej                  e ej,                  d/      dCd0                     Z#ej                  e ejH                  d1      dDd2                     Z%ed3        Z&ej                  e ejH                  d4      dDd5                     Z'edEd6       Z(edFd7       Z)ej                  edejT                  fdGd8              Z+d9 Z,ej                  edHd:              Z-ed;        Z.y)I    )annotations   )jit   )core)mathc                r    d}| j                   }|dkD  r|dz  }|dz  }|dkD  rt        j                  |      S )Nr   r   valuer   	constexpr)ilog2ns      H/home/dcms/DCMS/lib/python3.12/site-packages/triton/language/standard.py_log2r   
   sC    D	A
a%	a	 a% >>$    c                d    | j                   }t        j                  ||dz
  z  dk(  xr |dk7        S )Nr   r   r
   )r   r   s     r   _is_power_of_twor      s0    	A>>1A;1,7a88r   c                    | |z   dz
  |z  S )z
    Computes the ceiling division of :code:`x` by :code:`div`

    :param x: the input number
    :type x: Block
    :param div: the divisor
    :type div: Block
    r    )xdivs     r   cdivr      s     GaKCr   sigmoidc                :    ddt        j                  |        z   z  S )Nr   )r   exp)r   s    r   r   r   +   s     DHHaRL !!r   softmaxc                    | t        | d      z
  }t        j                  |      }t        |d      }t        j                  |||      S )Nr   )maxr   r   sumfdiv)r   ieee_roundingznumdens        r   r   r   2   s>     	
C1IA
((1+C
c1+C99S#}--r   c                H    t        j                  | | j                  g|      S )zn
    Returns a contiguous flattened view of :code:`x`.

    :param x: the input tensor
    :type x: Block
    )can_reorder)r   reshapenumel)r   r'   s     r   ravelr*   <   s     <<AGG9+>>r   c                    | |z  |z   }||z  }||z  }||z  }t        j                  ||z
  |      }||z  }|||z  z   }	||z  }
|	|
fS )a  
    Transforms the indices of a row-major `size_i * size_j` matrix into
    the indices of a column-major matrix for each group of `size_g` rows.

    For example, for :code:`size_i = size_j = 4` and :code:`size_g = 2`, it will
    transform ::

        [[0 , 1 , 2 , 3 ],
         [4 , 5 , 6 , 7 ],
         [8 , 9 , 10, 11],
         [12, 13, 14, 15]]

    into ::

        [[0, 2,  4 , 6 ],
         [1, 3,  5 , 7 ],
         [8, 10, 12, 14],
         [9, 11, 13, 15]]
    r   minimum)r   jsize_isize_jsize_gijsize_gjgroup_idoff_inew_inew_js              r   	swizzle2dr8   H   sm    , 
VaB voGW}HvE\\&5.&1F	gBBKE&LE%<r   c                0    t        j                  | d|      S )a'  
    Returns a tensor filled with the scalar value 0 for the given :code:`shape` and :code:`dtype`.

    :param shape: Shape of the new array, e.g., (8, 16) or (8, )
    :type shape: tuple of ints
    :param dtype: Data-type of the new array, e.g., :code:`tl.float16`
    :type dtype: DType
    r   )r   full)shapedtypes     r   zerosr=   p   s     99UAu%%r   c                B    t        | j                  | j                        S )z
    Returns a tensor of zeros with the same shape and type as a given tensor.

    :param input: input tensor
    :type input: Tensor
    )r=   r;   r<   )inputs    r   
zeros_liker@   }   s     ekk**r   c                    |r| |k(  xr ||k  }nd}| |kD  xs |}t        j                  || |      }t        j                  |||      }||fS NFr   where)	value1index1value2index2tie_break_lefttiegtv_reti_rets	            r   _argmax_combinerN      sY    26F?	&	CBJJr66*EJJr66*E%<r   c                     t        | |||d      S NTrN   rE   rF   rG   rH   s       r   _argmax_combine_tie_break_leftrS          66664@@r   c                     t        | |||d      S rB   rQ   rR   s       r   _argmax_combine_tie_break_fastrV          66665AAr   c                .    t        j                  | |      S N)r   maximumabs     r   _elementwise_maxr^          <<1r   rZ   return_indicesreturn_indices_tie_break_left)return_indices_argtie_break_argNc                z   t        j                  |       } |r<|rt        j                  | |t        |      S t        j                  | |t        |      S t        j
                  | j                  j                        t        j
                  d      k  rt        j
                  | j                  j                               r | j                  t         j                        } n@| j                  j                         sJ d       | j                  t         j                        } t        j                  | |t        |      S N	keep_dims    z"Expecting input to be integer type)r   _promote_bfloat16_to_float32_reduce_with_indicesrS   rV   r   r<   primitive_bitwidthis_floatingtofloat32is_intint32reducer^   r?   axisr`   ra   rg   s        r   r   r      s    
 --e4E(,,UD:Xdmnn,,UD:Xdmnn>>%++889DNN2<NN~~ekk5578.{{))+Q-QQ+,{{5$(8INNr   zmaximum indexrI   )rc   c                ,    t        | |d||      \  }}|S NT)r`   ra   rg   )r   r?   rs   rI   rg   _rets         r   argmaxry      s!     5$tSamvwHQJr   c                    |r| |k(  xr ||k  }nd}| |k  xs |}t        j                  || |      }t        j                  |||      }||fS rB   rC   )	rE   rF   rG   rH   rI   rJ   lt	value_ret	index_rets	            r   _argmin_combiner~      sZ    26F?	&	CB

2vv.I

2vv.Iir   c                     t        | |||d      S rP   r~   rR   s       r   _argmin_combine_tie_break_leftr      rT   r   c                     t        | |||d      S rB   r   rR   s       r   _argmin_combine_tie_break_fastr      rW   r   c                .    t        j                  | |      S rY   r,   r[   s     r   _elementwise_minr      r_   r   r-   c                T   t        j                  |       } |r<|rt        j                  | |t        |      S t        j                  | |t        |      S t        j
                  | j                  j                        dk  rt        j
                  | j                  j                               r | j                  t         j                        } n@| j                  j                         sJ d       | j                  t         j                        } t        j                  | |t        |      S re   )r   ri   rj   r   r   r   r<   rk   rl   rm   rn   ro   rp   rq   r   rr   s        r   minr      s    
 --e4E(,,UD:Xdmnn,,UD:Xdmnn>>%++889B>~~ekk5578.{{))+Q-QQ+,{{5$(8INNr   zminimum indexc                ,    t        | |d||      \  }}|S ru   )r   rv   s         r   argminr      s!     TQ_ktuFAsJr   c                    | |z   S rY   r   r[   s     r   _sum_combiner          q5Lr   r<   c                   t        j                  |      }||S d }| j                         r%| j                  dk  rt         j                  }|S d }|S | j                         r!| j                  dk  rt         j                  nd }|S )Nrh   )r   _unwrap_if_constexpris_int_signedint_bitwidthrp   is_int_unsigneduint32)in_dtyper<   	out_dtypes      r   _pick_sum_dtyper     s    %%e,E I"*"7"7""<DJJ	  CG	  
	!	!	##+#8#82#=DKK4	r   r    )	dtype_argc                    t        | j                  |      }|| j                  |      } t        j                  | |t
        |      S )Nrf   )r   r<   rm   r   rq   r   )r?   rs   rg   r<   r   s        r   r    r      s=    
 !0U CI#;;udLIFFr   c                    | |z  S rY   r   r[   s     r   _xor_combiner   !  r   r   zxor sumc                    t        j                  | j                  j                  j	                         d       t        j
                  | |t        |      S )Nz#xor_sum only supported for integersrf   )r   static_asserttypescalarro   rq   r   )r?   rs   rg   s      r   xor_sumr   )  s=     	uzz((//13XY;;udLIFFr   cumsumc                d    t        j                  |       } t        j                  | |t        |      S rY   )r   ri   associative_scanr   r?   rs   reverses      r   r   r   4  s+    
 --e4E  lGDDr   c                    | |z  S rY   r   r[   s     r   _prod_combiner   @  r   r   cumprodc                d    t        j                  |       } t        j                  | |t        |      S rY   )r   ri   r   r   r   s      r   r   r   E  s+    
 --e4E  mWEEr   c                   | j                   |z	  }|d|z  z  dd||z
  dz
  z  g}t        j                  | |      }t        j                  dd      d d d d f   }t        j                  t        |d|z
  z  d      d d d d d f   |      j                  |j                        }t        j                  t        ||z  d      d d d d d f   |      j                  |j                        }	t        j                  || j                        }t        j                  |	| j                        }	t        j                  | j                  j                  d      }
|j                  |
d      }|	j                  |
d      }| j                  |
d      }|t        j                  ||	kD  |k7  ||z  t        |            z  }|j                  | j                  d      S )Nr   r   r   Tbitwidthsignedbitcast)r)   r   r(   arangebroadcast_tor    rm   r<   r;   get_int_dtyperk   rD   r@   )r   flipr   n_dimsn_outerr;   ymaskleftrightidtypeileftirightixrx   s                  r   _compare_and_swapr   Q  s   gg/G$q!t^QFQJN0CDEQA;;q!T1d]+DSa$h3AtQJ?GJJ177SDc!d(A.q$z:EBEEaggNE<<agg&DLL(E)C)CDQFGGFDG)EXXfdX+F	
fd	#B
tzz4%<D0%&.*R.Q
QC66!''46((r   c                   | j                   |z	  }t        j                  ||k         |dk(  re|d|dz
  |z
  z  z  dd|z  g}t        j                  t        j                  t        j
                  dd      ddddf   |      | j                        }n|}t        j                  |      D ]  }t        | ||||z
  z   |      }  | S )zb
    order_type 0 == ascending
    order_type 1 == descending
    order_type 2 == alternating
    r   r   r   N)	r)   r   r   r(   r   r   r;   static_ranger   )r   stageorderr   r   r;   r   r   s           r   _bitonic_merger   e  s      gg/Gu' z!(1vzE/A+B!BAq%x P||D--dkk!Q.?a.NPUVXYX_X_`u% EaqFUN';VDEHr   c                2   |t        | j                        dz
  n|}t        j                  |t        | j                        dz
  k(  d       t	        | j                  |         }t        j
                  d|dz         D ]  }t        | |||k  rdn||      }  | S )a  
    Sorts a tensor along a specified dimension.

    :param x: The input tensor to be sorted.
    :type x: Tensor
    :param dim: The dimension along which to sort the tensor. If None, the tensor is sorted along the last dimension. Currently, only sorting along the last dimension is supported.
    :type dim: int, optional
    :param descending: If set to True, the tensor is sorted in descending order. If set to False, the tensor is sorted in ascending order.
    :type descending: bool, optional
    r   z+only minor dimension is currently supportedr   )lenr;   r   r   r   r   r   )r   dim
descending_dimr   r   s         r   sortr   ~  s     03{3qww<!+Dts177|a//1^_"1774=1Fq&1*- J1aa&jj&IJHr   c                    t        j                  |       } t        j                  |      }| t        |      dz
  } | t        |      dz
  k(  sJ d       t        j                  |       S )Nr   z2Currently only support flipping the last dimension)r   r   r   r   )r   r;   s     r   _get_flip_dimr     s_    

#
#C
(C%%e,E
{%j1n#e*q. V"VV >>#r   c           	     <   t        j                  t        | j                  t	        || j                                        t        j                  t        | j
                               t        | j
                        }t        | j
                        t        | j                  t	        || j                                 z
  }t        j                  | j                  j                  d      }t        j                  | j                  |d      dg|z        }t        j                  ||      }t        j                  dd      dddf   dt        j                  dd      z
  k(  }t        j                  ||      D ]f  }|}t        j                  d|dz         D ]'  }	|	|k7  s	|	|dz   k7  st        j                  ||	      }) t        ||z  |dz   d|j                        }h t        j                  || j                        j                  | j                  d      } | S )	z
    Flips a tensor `x` along the dimension `dim`.

    :param x: the first input tensor
    :type x: Block
    :param dim: the dimension to flip along (currently only final dimension supported)
    :type dim: int
    Tr   r   r   r   Nr   )rg   r<   )r   r   r   r;   r   r)   r   r   r<   rk   r(   rm   expand_dimsr   r   r    )
r   r   stepsstartr   r   r   r   flip2r.   s
             r   r   r     s    	'c1770K(LMN'01 "!''NE!!''NU177=agg;V3W-XXE)C)CDQFQTT&$T/!u=AE"AKK1ag&!dkk!Q.?*??Due, A""1eai0 	3AAv!q1u*((2	3 E	1q5D@A 	Q ##AGGT#:AHr   c                    t        j                  | |      }t        |j                        dk(  r|S t        j                  ||j                  dd d|j                  d   z  gz         S )a7  
    Interleaves the values of two tensors along their last dimension. The two tensors must have the same shape.
    Equivalent to `tl.join(a, b).reshape(a.shape[:-1] + [2 * a.shape[-1]])`

    :param a: The first input tensor.
    :type a: Tensor
    :param b: The second input tensor.
    :type b: Tensor
    r   Nr   )r   joinr   r;   r(   )r\   r]   cs      r   
interleaver     sY     			!QA
177|q
 ||Aqwws|q1772;.??@@r   )r   core.constexpr)F)NFTF)TF)r   r   r<   r   )NFN)r<   r   rB   )r   F)r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   rY   )/
__future__r   runtime.jitr    r   r   r   r   _tensor_member_fnr   _add_math_1arg_docstrr   r   r*   r8   r=   r@   rN   rS   rV   r^   _add_reduction_docstrr   ry   r~   r   r   r   r   r   r   r   r    r   r   _add_scan_docstrr   r   r   r   r   CONSTEXPR_0r   r   r   r   r   r   r   <module>r      sg   "   
 9 	   	  I&" '  " I&. '  . ?  ? $ $N 	& 	& + +   A A B B   I:J*IKOK  O" O;KL M       A A B B   I:J*IKOK  O" O;KL M  
   EW5G 6  G   I&G '  G x E !  E   y!F "  F ) )&  0 "&TEUEU   0   < A Ar   