
    nVh2W                      d dl mZ d dlZd dlmZmZmZmZmZ d dl	Z	ddl
mZ ddlmZ  ed      Z G d	 d
e      ZdtdZdtdZdudZ	 	 	 	 dvdZdwdxdZdydZ	 	 dz	 d{dZd|dZ	 	 	 	 d}dZ	 	 	 	 d}dZ	 	 	 	 d}dZd~dZd~dZ	 	 	 	 ddZd~dZ ddZ!ddZ"ddZ#	 	 	 	 ddZ$ddZ%ddZ&dd Z'dd!Z(dd"Z)dd#Z*dd$Z+dd%Z,dd&Z-dd'Z.dd(Z/dd)Z0dd*Z1dd+Z2dd,Z3dd-Z4dd.Z5dd/Z6dd0Z7dd1Z8dd2Z9dd3Z:dd4Z;dd5Z<dd6Z=dd7Z>dd8Z?dd9Z@dd:ZAdd;ZBdd<ZCdd=ZD	 d	 	 	 dd>ZEd? ZFd@ ZGdA ZHdB ZIdC ZJdD ZKdE ZLdF ZMdG ZN	 	 	 	 	 	 	 	 	 	 	 	 ddHZOddIZPdJ ZQ	 	 	 	 ddKZR	 	 	 	 ddLZS	 	 	 	 ddMZTddNZU	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddOZVddPZWdQ ZXdR ZY	 	 	 	 	 	 ddSZZddTZ[	 	 	 	 ddUZ\ddVZ]ddWZ^ddXZ_ddYZ`ddZZadd[Zb	 	 	 	 dd\Zcd] Zd	 	 	 	 	 	 dd^Zedd_Zfdd`Zg	 	 	 	 	 	 	 	 	 	 	 	 ddaZhddbZidc ZjdddZk	 	 	 	 ddeZlddfZmddgZnddhZoddiZpddjZqddkZrddlZsddmZtddnZudo ZvdwdpZwddqZxddrZy	 	 	 	 	 	 	 	 	 	 	 	 ddsZzy)    )annotationsN)ListOptionalSequenceTupleTypeVar   )ir   )coreTc                       e Zd Z fdZ xZS )IncompatibleTypeErrorImplc                    || _         || _        d| j                   j                         z   dz   | j                  j                         z   | _        t        t
        |   | j                         y )Nzinvalid operands of type  and )type_atype_b__repr__messagesuperr   __init__)selfr   r   	__class__s      H/home/dcms/DCMS/lib/python3.12/site-packages/triton/language/semantic.pyr   z"IncompatibleTypeErrorImpl.__init__   sX    2T[[5I5I5KKgUX\XcXcXlXlXnn'7E    )__name__
__module____qualname__r   __classcell__)r   s   @r   r   r      s    F Fr   r   c                    | dvrt        d|        t        j                  |j                  |       t        j                        S )Nr   r   r	   z+program_id axis must be 0, 1, or 2 but got )
ValueErrortltensorcreate_get_program_idint32axisbuilders     r   
program_idr*      s=    9FtfMNN99W2248"((CCr   c                    | dvrt        d|        t        j                  |j                  |       t        j                        S )Nr!   z-num_programs axis must be 0, 1, or 2 but got )r"   r#   r$   create_get_num_programsr&   r'   s     r   num_programsr-   !   s=    9HOPP99W44T:BHHEEr   c                `   | j                   }|j                   }| j                  }|j                  }||k(  r	||kD  r| S |S |t        j                  j                  j
                  k(  r	||k\  r| S |S |t        j                  j                  j
                  k(  r	||k\  r|S | S t        d| d|       )Nzunexpected signedness r   )int_bitwidthint_signednessr#   dtype
SIGNEDNESSUNSIGNED	TypeError)a_tyb_tya_rankb_ranka_snb_sns         r   integer_promote_implr;   ,   s    FFDD t|t0D0	$$--	-'t1T1	$$--	-'t1T1
,TF%v>
??r   c                   ||k7  rx|r| |fn|| f\  }}|j                         j                  |j                         j                  k  r6|r2|t        j                  t        j                  fv rt        j
                  S |S | j                         s|j                         rt        j                  S | j                         s|j                         rt        j
                  S | j                         s|j                         r"|rt        j
                  S t        j                  S | j                         r2|j                         r"|rt        j
                  S t        j                  S | j                         s|j                         rt        j
                  S | j                         r'|j                         r| |k(  r| S t        j                  S | j                         r|j                         st        d|  d|       |rL| j                  |j                  k7  r3t        d| j                         z   dz   |j                         z   dz         t!        | |      S )Nunexpected type r   zCannot use /, #, or % with x because they have different signedness;this is unlikely to result in a useful answer. Cast them to the same signedness.)kindvaluer#   float16bfloat16float32is_fp64float64is_fp32is_fp16is_bf16is_fp8is_intr4   r0   r   r;   )r5   a_is_scalarr6   b_is_scalar
div_or_mod	scalar_ty	tensor_tys          r   computation_type_implrP   <   s   
 k!/:d|t	9>>!!Y^^%5%;%;;yRZZ,EEzz! ||~zz ||~zz ||~::::||~$,,.::;;||~zz{{}t|t33;;=*4&dV<== d))T-@-@@5G'QTXTaTaTcckk l 	l  d++r   c                   t        | t              r3t        j                  |j	                  |       t        j
                        S t        | t              rd| cxk  rdk  rn nt        j                  }nld| cxk  rdk  rn nt        j                  }nMd| cxk  rdk  rn nt        j                  }n.d| cxk  rdk  rn nt        j                  }nt        d|  d      t        d	| ||
      S t        | t              rnd}dddz  z  }t        d   |       }|t        d      k(  s|dk(  s| | k7  s||cxk  r|k  rn nt        j                  }nt        j                   }t        d	| ||
      S t        | t        j"                        rt%        | j&                  |      S t        | t        j                        r| S |rt)        d|  dt+        |        d      | S )N           l                             l            zNonrepresentable integer . r1   r)   g      8g   ?r	      absinf        zcannot convert z	 of type z
 to tensor)
isinstanceboolr#   r$   get_int1int1intr&   uint32int64uint64r"   fullfloat__builtins__rC   rE   	constexpr	to_tensorr@   r4   type)xr)   
check_typer1   min_float32max_float32abs_xs          r   ri   ri   o   s   !Tyy))!,bgg66	As	QHHEa%IIEq 5 HHEa%IIE81=>>B88	Au	!QV+U#A&E%L C<6%.;.JJEJJEB88	Ar||	$'**	Aryy	!/!Id1gYjIJJHr   c                    | j                         rL|st        | |      |j                         r| |k7  rt        | |      |j                         rt        | |      y y N)is_ptrr   is_floating)r   r   allow_ptr_as      r   check_ptr_type_implru      s[    }}+FF;;==?& 0+FF;;+FF;;   r   c                   t        | t        j                        }t        |t        j                        }|r| }	t        | |      } |r|}
t        ||      }| j                  j
                  }|j                  j
                  }t        |||       t        |||       |r.|j                         s|j                         st        |||||      }|r	dk  r|j                         s|r 
dk  r|j                         rt        d      |j                         rx|r:|j                         	cxk  r|j                         k  sn t        d|	 d|       |r:|j                         
cxk  r|j                         k  sn t        d|
 d|       |rt        d	||      nt        | ||      } |rt        d
||      nt        |||      }t!        | ||      \  } }| |fS )Nr   z{Cannot perform a binary operation between an unsigned tensor and a negative scalar. Perform a explicit cast on one of them.zScalar z is out of range for type rW   rX   )r]   numbersNumberri   rj   scalarru   rr   rP   is_int_unsignedr"   rJ   get_int_min_valueget_int_max_valuere   castbroadcast_impl_value)lhsrhsr)   allow_lhs_ptrallow_rhs_ptrarithmetic_checkrM   lhs_is_scalarrhs_is_scalar
lhs_scalar
rhs_scalar
lhs_sca_ty
rhs_sca_ty
ret_sca_tys                 r   binary_op_type_checking_implr      s    sGNN3MsGNN3M
W%
W% JJ
J>
J>
 1 1 3J<M<M<O*:}jR_akl
j1n1K1K1M Z!^
8R8R8T G H Hj&B&B&D
&tV`VrVrVt&t 7:,6PQ[P\!]^^j&B&B&D
&tV`VrVrVt&t 7:,6PQ[P\!]^^BO 
*g?UYZ]_ikrUs 	 CP 
*g?UYZ]_ikrUs 	 $Cg6HC8Or   c                (   | j                   j                  j                  dk\  s|j                  j                  sy | j                   j                  }|j                   j                  }||k(  sJ |j                         sJ t        | t        j                  |      } t        |t        j                  |      } || |d|      }|j                         }t        j                  |j                  |      t        j                        }|j                         }t        j                  |j                  |      t        j                        }t        t        |||      t        |||      |      }	d|j                   d|j                    }
t#        |	|
|       y )N@   Fra   z! overflow detected for operation )rj   ry   r/   optionssanitize_overflowrJ   r}   r#   rc   r|   r$   	get_int64r{   and_
less_equalgreater_equalr   device_assert)r   r   r)   	binary_opr   r   ret	max_value	min_valuecondmsgs              r    binary_op_sanitize_overflow_implr      s>   
xx##r)1R1RJJ###
sBHHg
&C
sBHHg
&C
CeW
-C,,.I		'++I6AI,,.I		'++I6AI
3	73]3	SZ5[]deD
''((I)J\J\I]
^C$W%r   c                `   t        | ||dd      \  } }| j                  j                  }|j                  j                  }|j                         r|j                         rt	        d      |j                         r@|j                         s0|| }} | j                  j                  }|j                  j                  }|j                         r|j
                  }|j                  j                         r|j                  j                  dk  r|j                  j                         rLt        j                  t        j                  |j                  j                               j                  |      }nt        j                  j                  |      }|j                  |j
                  |d      }t        j                   |j#                  | j
                  |      | j                        S |j%                         rDt        j                   |j'                  | j
                  |j
                        | j                        S |j)                         rX|rt+        | ||t,               t        j                   |j/                  | j
                  |j
                        | j                        S t	        d|       )NTzcannot add pointers togetherr   Fr=   )r   rj   ry   rr   r4   handler1   rz   r/   is_blockr#   
block_typerc   get_block_shapesto_ircreate_int_castr$   create_addptrrs   create_faddrJ   r   add
create_add)inputotherr   r)   input_scalar_tyother_scalar_tyother_handlei64_tys           r   r   r      s   /ugtTRLE5jj''Ojj''OO$:$:$<677 (>(>(@eu**++**++||;;&&(U[[-E-E-Jzz""$rxx1L1L1NOUUV]^0"225<<OLyy..u||\JEJJWW		$	$	&yy,,U\\5<<H%**UU				!,UE7CHyy++ELL%,,GTT
&&78
99r   c           	        t        | ||dd      \  } }| j                  j                  }|j                         rNt	        j
                  |j                  | j                  t        ||      j                        | j                        S |j                         rDt	        j
                  |j                  | j                  |j                        | j                        S |j                         rX|rt        | ||t               t	        j
                  |j                  | j                  |j                        | j                        S t        d|       )NTFr=   )r   rj   ry   rr   r#   r$   r   r   minusrs   create_fsubrJ   r   sub
create_subr4   r   r   r   r)   rN   s        r   r   r     s    /ugtUSLE5

!!Iyy..u||U5'=R=Y=YZ\a\f\fggyy,,U\\5<<H%**UU				,UE7CHyy++ELL%,,GTT
&yk2
33r   c                   t        | ||      \  } }| j                  j                  }|j                         rDt	        j
                  |j                  | j                  |j                        | j                        S |j                         rX|rt        | ||t               t	        j
                  |j                  | j                  |j                        | j                        S t        d|       Nr=   )r   rj   ry   rs   r#   r$   create_fmulr   rJ   r   mul
create_mulr4   r   s        r   r   r     s    /ugFLE5

!!Iyy,,U\\5<<H%**UU				,UE7CHyy++ELL%,,GTT
&yk2
33r   c           	     6   t        | ||dddd      \  } }| j                  j                  }|j                  j                  }|j                         r|j	                         rt        |||      }n|j	                         r|j                         rt        | ||      } n|j	                         rG|j	                         r7t        | t        j                  |      } t        |t        j                  |      }nc|j                         rE|j                         r5|j                  |j                  kD  rt        |||      }nt        | ||      } nt        d|       t        j                  |j                  | j                  |j                        | j                        S NFTr=   )r   rj   ry   rs   rJ   r}   r#   rC   fp_mantissa_widthr4   r$   create_fdivr   )r   r   r)   r   r   s        r   truedivr   #  sH   /ugueUY[_`LE5jj''Ojj''O""$)?)?)AUOW5				!o&A&A&CUOW5				!o&<&<&>UBJJ0UBJJ0		$	$	&?+F+F+H,,/P/PP9E9E *?*;<==99W((u||DejjQQr   c           	     Z   t        | ||dddd      \  } }| j                  j                  }|j                  j                  }|j                         r|j                         rt	        ||      }t        | ||      } t        |||      }|j                         rDt        j                  |j                  | j                  |j                        | j                        S t        j                  |j                  | j                  |j                        | j                        S t        d|       r   )r   rj   ry   rJ   r;   r}   is_int_signedr#   r$   create_sdivr   create_udivr4   )r   r   r)   r   r   ret_tys         r   floordivr   =  s    /ugueUY[_`LE5jj''Ojj''OO$:$:$<%oGUFG,UFG,!99W00u||LejjYY99W00u||LejjYY
&&78
99r   c           	     d   | j                   j                  }|j                   j                  }|j                         r|j                         st        d      t	        | ||dddd      \  } }|j                  | j                  |j                        }t        j                  || j                         S )Nz4both operands of fdiv must have floating scalar typeFT)	rj   ry   rs   r4   r   r   r   r#   r$   )r   r   ieee_roundingr)   r   r   r   s          r   fdivr   L  s    jj''Ojj''O&&(0K0K0MNOO/ugueUZ\`aLE5


ellELL
9C99S%**%%r   c           	     .   t        | ||dddd      \  } }| j                  j                  }|j                  j                  }|j                         rDt	        j
                  |j                  | j                  |j                        | j                        S |j                         r|j                  |j                  k7  r3t        d|j                         z   dz   |j                         z   dz         |j                         rDt	        j
                  |j                  | j                  |j                        | j                        S t	        j
                  |j                  | j                  |j                        | j                        S t        d|       )NFTzCannot mod z by r>   r=   )r   rj   ry   rs   r#   r$   create_fremr   rJ   r0   r4   r   r   create_sremcreate_urem)r   r   r)   rN   r   s        r   modr   W  sF   /ugueUY[_`LE5

!!Ijj''Oyy,,U\\5<<H%**UU				##'E'EEMI,>,>,@@6IOLdLdLff jo o p p ""$99W00u||LejjYY99W00u||LejjYY
&yk2
33r   c                f   t        | ||      \  } }| j                  }|j                         r|t        j                  j
                  k(  rDt        j                  |j                  | j                  |j                        | j                        S |t        j                  j                  k(  rDt        j                  |j                  | j                  |j                        | j                        S t        d|       |j                         rDt        j                  |j                  | j                  |j                        | j                        S |j                         rDt        j                  |j!                  | j                  |j                        | j                        S t#        d|       NzUnexpected propagate_nan Unexpected dtype )r   r1   rs   r#   PropagateNanALLr$   create_minimumfr   rj   NONEcreate_minnumfr"   r   create_minsirz   create_minuir4   rk   ypropagate_nanr)   r1   s        r   minimumr   p  3   '1g6DAqGGEBOO///99W44QXXqxxH!&&QQboo22299W33AHHahhGPP8HII				yy--ahhA166JJ				 yy--ahhA166JJ+E7344r   c                f   t        | ||      \  } }| j                  }|j                         r|t        j                  j
                  k(  rDt        j                  |j                  | j                  |j                        | j                        S |t        j                  j                  k(  rDt        j                  |j                  | j                  |j                        | j                        S t        d|       |j                         rDt        j                  |j                  | j                  |j                        | j                        S |j                         rDt        j                  |j!                  | j                  |j                        | j                        S t#        d|       r   )r   r1   rs   r#   r   r   r$   create_maximumfr   rj   r   create_maxnumfr"   r   create_maxsirz   create_maxuir4   r   s        r   maximumr     r   r   c                X   t        |||      \  }}t        | ||      \  } }t        | ||      \  } }| j                  }|j                         rPt        j                  |j                  | j                  |j                  |j                  |      | j                        S t        d| d      )Nr   z(. Only floating point clamp is supported)	r   r1   rs   r#   r$   create_clampfr   rj   r4   )rk   minmaxr   r)   r1   s         r   clampr     s    +Cg>HC)!S':FAs)!S':FAsGGEyy..qxxSZZQ^_abagaghh+E72Z[\\r   c                :   t        | ||      \  } }| j                  j                  }|j                  j                  }|j                         r|j                         st	        ||      t        ||      }||k7  rt        | ||      } ||k7  rt        |||      }| |fS rq   )r   rj   ry   rJ   r   r;   r}   )r   r   r)   input_sca_tyother_sca_tyr   s         r   bitwise_op_type_checking_implr     s    /ugFLE5::$$L::$$L (;(;(='lCC%lLAJ\!UJ0\!UJ0%<r   c                    t        | ||      \  } }t        j                  |j                  | j                  |j                        | j
                        S rq   )r   r#   r$   
create_andr   rj   r   r   r)   s      r   r   r     >    0wGLE599W''ellCUZZPPr   c                    t        | ||      \  } }t        j                  |j                  | j                  |j                        | j
                        S rq   )r   r#   r$   	create_orr   rj   r   s      r   or_r     s>    0wGLE599W&&u||U\\BEJJOOr   c                    t        | ||      \  } }t        j                  |j                  | j                  |j                        | j
                        S rq   )r   r#   r$   
create_xorr   rj   r   s      r   xor_r     r   r   c                   | j                   j                         s t        | t        j                  d      |      } |j                   j                         s t        |t        j                  d      |      }t        | ||      S Nr`   )rj   is_int1bitcastr#   r1   r   r   s      r   logical_andr     s_    ::rxx/9::rxx/9ug&&r   c                   | j                   j                         s t        | t        j                  d      |      } |j                   j                         s t        |t        j                  d      |      }t        | ||      S r   )rj   r   r   r#   r1   r   r   s      r   
logical_orr     s_    ::rxx/9::rxx/9ueW%%r   c                    | j                   j                         s t        | t        j                  d      |      } t        | |      S r   )rj   r   r   r#   r1   invert)r   r)   s     r   not_r     s6    ::rxx/9%!!r   c                    t        | ||      \  } }t        j                  |j                  | j                  |j                        | j
                        S rq   )r   r#   r$   create_lshrr   rj   r   s      r   lshrr     >    0wGLE599W((u||DejjQQr   c                    t        | ||      \  } }t        j                  |j                  | j                  |j                        | j
                        S rq   )r   r#   r$   create_ashrr   rj   r   s      r   ashrr    r   r   c                    t        | ||      \  } }t        j                  |j                  | j                  |j                        | j
                        S rq   )r   r#   r$   
create_shlr   rj   r   s      r   shlr    r   r   c                    | S rq   rW   )r   s    r   plusr    s    Lr   c                   | j                   j                  }|j                         rt        d|j	                         z   dz         t        j                  |j                  |j                  |            |      }t        || d|      S )Nz$wrong type argument to unary minus ()T)
rj   ry   rr   r"   r   r#   r$   get_null_valuer   r   )r   r)   r   _0s       r   r   r     st    ::$$L?,BWBWBYY\__``	7)),*<*<W*EF	UBr5$((r   c                .   | j                   j                  }|j                         s|j                         rt	        d|j                         z   dz         t        j                  |j                  |j                  |            |      }t        | ||      S )Nz%wrong type argument to unary invert (r
  )rj   ry   rr   rs   r"   r   r#   r$   get_all_ones_valuer   r   )r   r)   r   _1s       r   r   r     s}    ::$$L 8 8 :@<CXCXCZZ]``aa	7--l.@.@.IJL	YBr7##r   c                    | j                   j                         st        j                  S | j                   j                  }t        j
                  t        j                  |      S rq   )rj   r   r#   r`   shaper   )vr  s     r   
_bool_liker    s;    66??wwFFLLE==%((r   c                \   t        | ||      \  } }| j                  j                  }|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S |j                         r|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S t	        j
                  |j                  | j                  |j                        t        |             S t        d|       r   )r   rj   ry   rs   r#   r$   create_fcmpOGTr   r  rJ   r   create_icmpSGTcreate_icmpUGTr4   r   r   r)   rN   s       r   greater_thanr        /ugFLE5

!!Iyy//ellKZX]M^__				""$99W33ELL%,,OQ[\aQbcc99W33ELL%,,OQ[\aQbcc
&yk2
33r   c                \   t        | ||      \  } }| j                  j                  }|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S |j                         r|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S t	        j
                  |j                  | j                  |j                        t        |             S t        d|       r   )r   rj   ry   rs   r#   r$   create_fcmpOGEr   r  rJ   r   create_icmpSGEcreate_icmpUGEr4   r  s       r   r   r     r  r   c                \   t        | ||      \  } }| j                  j                  }|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S |j                         r|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S t	        j
                  |j                  | j                  |j                        t        |             S t        d|       r   )r   rj   ry   rs   r#   r$   create_fcmpOLTr   r  rJ   r   create_icmpSLTcreate_icmpULTr4   r  s       r   	less_thanr#  )  r  r   c                \   t        | ||      \  } }| j                  j                  }|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S |j                         r|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S t	        j
                  |j                  | j                  |j                        t        |             S t        d|       r   )r   rj   ry   rs   r#   r$   create_fcmpOLEr   r  rJ   r   create_icmpSLEcreate_icmpULEr4   r  s       r   r   r   8  r  r   c                   t        | ||      \  } }| j                  j                  }|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S |j                         rCt	        j
                  |j                  | j                  |j                        t        |             S t        d|       r   )r   rj   ry   rs   r#   r$   create_fcmpOEQr   r  rJ   create_icmpEQr4   r  s       r   equalr+  G      /ugFLE5

!!Iyy//ellKZX]M^__				yy..u||U\\JJW\L]^^
&yk2
33r   c                   t        | ||      \  } }| j                  j                  }|j                         rCt	        j
                  |j                  | j                  |j                        t        |             S |j                         rCt	        j
                  |j                  | j                  |j                        t        |             S t        d|       r   )r   rj   ry   rs   r#   r$   create_fcmpUNEr   r  rJ   create_icmpNEr4   r  s       r   	not_equalr0  S  r,  r   c                   t        | t              rt        |t              st        d      t        | dz	        }t        |dz	        }|s|rt        d      || k  rt        d      || z
  }||dz
  z  dk7  rt        d      |g}t	        j
                  t        j                  |      }t	        j                  |j                  | |      |      S )Nz/arange's arguments must be of type tl.constexpr    zarange must fit in int32z=arange's end argument must be greater than the start argumentr   r   z#arange's range must be a power of 2)	r]   ra   r"   r^   r#   r   r&   r$   create_make_range)startendr)   is_start_int64is_end_int64ranger  r   s           r   aranger9  d  s    eS!C)=JKK%2+&Nr	?L344
e|XYY%KE!>??GE]]288U+F99W..uc:FCCr   c                   t        |t        j                        r.|j                  j                  dk(  sJ d       t        |||      }nj|t        d      |dk(  r!|j                  |j                  |            }n!t        |d|j                         } ||      }t        j                  ||      }t        || |      S )Nr   zonly accepts size-1 tensorz2dtype must be specified when value is not a tensorr   get_)r]   r#   r$   numelr@   r}   r"   r  r   getattrnamesplat)r  r@   r1   r)   get_value_fns        r   re   re   u  s    %#{{  A%C'CC%UE7+ =QRRA:**5;;w+?@E"7d5::,,?@L 'E		%'w''r   c                   | j                   j                         rJ d       t        |      dk(  r| S t        j                  | j
                  |      }t        j                  |j                  | j                  |      |      S )NzCannot splat a block tensorr   )	rj   r   lenr#   r   r1   r$   create_splatr   )r@   r  r)   r   s       r   r?  r?    sd    zz""$C&CC$
5zQ]]5;;.F99W))%,,>GGr   c                   d}|D ]  }||z  }	 | j                   j                  |k7  rt        d      t        j                  | j                   j
                  |      }t        j                  |j                  | j                  ||      |      S )Nr   z:reshape() cannot change total number of elements in tensor)	rj   r<  r"   r#   r   ry   r$   create_reshaper   )r   	dst_shapecan_reorderr)   r<  sr   s          r   reshaperI    s|    E 
zz5 UVV]]5::,,i8F99W++ELL)[QSYZZr   c                   | j                   D cg c]  }t        j                  |       }}|j                  |d       | j                  j                         st        | ||      S t        j                  | j                  j                  |      }t        j                  |j                  | j                  |      |      S c c}w )Nr   )r  r)   )r  r#   _constexpr_to_valueinsertrj   r   r?  r   ry   r$   create_expand_dimsr   )r   r(   r)   rk   rF  r   s         r   expand_dimsrN    s    49KK@q''*@I@T1:: U)W==]]5::,,i8F99W//dCVLL As   Cc                L   |sJ d       t        | j                        dk(  sJ t        j                  | j                  j
                  | j                  d   |j                  d   z   g      }t        j                  |j                  | j                  |j                        |      S )Nz;current implementation of `cat` always may reorder elementsr   r   )	rB  r  r#   r   rj   ry   r$   
create_catr   )r   r   rG  r)   ret_types        r   catrR    s|    UUU;syy>Q}}SXX__syy|ciil/J.KLH99W''

CJJ?JJr   c                   t        | ||      \  } }| j                  g k(  }|rt        | d|      } t        |d|      }t        | j                  d   t        j
                        rt	        j
                  d      }nd}| j                  |gz   }t	        j                  | j                  j                  |      }t	        j                  |j                  | j                  |j                        |      }|rt        |dgd|      }|S )Nr   r	   FrG  r)   )r~   r  rN  r]   r#   rh   r   rj   ry   r$   create_joinr   rI  )abr)   
was_rank_1two	new_shaperQ  r   s           r   joinr\    s    1g.DAq BJ1g&1g&!''"+r||,ll1o3%I}}QVV]]I6H
))G''!((;X
FCcA3E7CJr   c                   t        | j                        dkD  sJ t        j                  | j                  d         dk(  sJ | j                  d d }t        j                  | j
                  j                  |      }|j                  | j                        \  }}t        j                  ||      t        j                  ||      fS )Nr   rT  r	   )
rB  r  r#   rK  r   rj   ry   create_splitr   r$   )rW  r)   r[  rQ  outLHSoutRHSs         r   splitra    s    L1""1772;/1454I}}QVV]]I6H))!((3NFF
		&(#
		&(# r   c                   t        | j                        t        |      k7  rt        d      t        d |D              t	        t        t        |                  k7  rt        d|       t        j                  | j                  j                  |D cg c]  }| j                  |    c}      }t        j                  |j                  | j                  |      |      S c c}w )Nz5permute dims must have the same length as input shapec              3  F   K   | ]  }t        j                  |        y wrq   )r#   rK  ).0ds     r   	<genexpr>zpermute.<locals>.<genexpr>  s     6Ab$$Q'6s   !z?permute dims must be a permutation of 0, 1, ..., n-1, but were )rB  r  r"   sortedlistr8  r#   r   rj   ry   r$   create_transr   )r   dimsr)   re  rQ  s        r   permuterk    s    
5;;3t9$PQQ666$uSY?O:PPZ[_Z`abb}}UZZ..0NAQ0NOH99W))%,,=xHH 1Os   C 
c                   | j                   j                         sPt        j                  | j                   |      }t        j                  |j                  | j                  |      |      S | j                   j                         }t        |      t        |      k7  rt        d| d|       ||k(  r| S t        |      D ]0  \  }}||   |k7  s|dk7  st        d||    d| d| d| d| 
       t        j                  | j                   j                  |      }t        j                  |j                  | j                  |      |      S )Nz!Cannot broadcast, rank mismatch: z, r   z3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension : )rj   r   r#   r   r$   rC  r   r   rB  r"   	enumeratery   create_broadcast)r   r  r)   r   	src_shapeiitems          r   broadcast_impl_shapers    s;   :: uzz51yy--ellEBFKK

++-I
9~U#<YKr%QRR	Y' <48t	RSXYZS[R\ ]??Cf E!!"2i[5'; < <<
 ]]5::,,e4F99W--ellEBFKKr   c           	     @   | j                   }|j                   }|j                         r||j                         slt        j                  |j                  |j
                        }t        j                  |j                  |j                  |j                               |      }| |fS |j                         s||j                         rlt        j                  |j                  |j
                        }t        j                  |j                  | j                  |j                               |      } | |fS |j                         r|j                         r|j                         }|j                         }t        |      t        |      k  rt        t        |      t        |            D ]z  }t        j                  |j                  | j                  d      t        j                  |j                  dg|j                  z               } | j                   }|j                         }| nt        |      t        |      k  rt        t        |      t        |            D ]z  }t        j                  |j                  |j                  d      t        j                  |j                  dg|j                  z               }|j                   }|j                         }| t        |      t        |      k(  sJ g }t        |      D ]q  \  }	}
||	   }|
dk(  r|j                  |       "|dk(  s||
k(  r|j                  |
       >t!        dt#        |	      z   dz   t#        |
      z   dz   t#        |      z          ||k7  rPt        j                  |j                  |      }t        j                  |j%                  | j                  |      |      } ||k7  rPt        j                  |j                  |      }t        j                  |j%                  |j                  |      |      }| |fS )Nr   r   z?Cannot make_shape_compatible: incompatible dimensions at index rm  r   )rj   r   r#   r   ry   r  r$   rC  r   r   rB  r8  rM  valuesrn  appendr"   strro  )r   r   r)   lhs_tyrhs_ty	lhs_shape	rhs_shape_	ret_shaperq  leftrightr   s                r   r~   r~     s_   XXFXXF !2v}}fll;ii,,SZZ9P9P9RSU[\V 8OS __6??#4v}}fll;ii,,SZZ9P9P9RSU[\N 8OK 
	v0++-	++-	y>C	N*3y>3y>: 6ii : :3::q I "fmmaS9CSCS=S TV"335		6
 ^c)n,3y>3y>: 6ii : :3::q I "fmmaS9CSCS=S TV"335		6
 9~Y///	 + 	aGAtaLEqy  '1*%4-  &  "-/21v"68<"=?B4y"IKR"SUXY^U_"` a a	a 	!]]6==)<F))G44SZZKVTC	!]]6==)<F))G44SZZKVTC8Or   c                    | y | dk(  rt         j                  j                  S | dk(  rt         j                  j                  S t	        d|  d      )NrtnertzzInvalid rounding mode: z0. Supported rounding modes are 'rtne' and 'rtz'.)r
   ROUNDING_MODERTNERTZr"   )rounding_modes    r   _str_to_rounding_moder  ,  sU    $$$###
.}o=mn
oor   c                F   | j                   }|j                         r8t        j                  |j                  | j                   j                               }||k(  r| S |j                  }|j                  }|j                         s|j                         rt        | ||      S |j                  }|j                  }||k7  r&t        dt        |      z   dz   t        |      z         t        j                  |j                  | j                  |j                  |            |      S )Nz!Cannot bitcast data-type of size z to data-type of size )rj   r   r#   r   ry   r   rr   r}   primitive_bitwidthr"   rw  r$   create_bitcastr   r   )r   dst_tyr)   src_ty
src_sca_ty
dst_sca_tysrc_bitsdst_bitss           r   r   r   6  s    ZZFv}}ejj.I.I.KLJJj//1E67++,,H,,H8<s8}L P. .03H> ? 	?99W++ELL&,,w:OPRXYYr   c                   | j                   }|j                         r8t        j                  |j                  | j                   j                               }||k(  r| S |j                  }|j                  }t        |      }d}|j                         rf|j                         rV|j                  |j                  k  r=|t        j                  j                  }nH|t        j                  j                  k7  r+d}n(|&t        dt        |      z   dz   t        |      z         |j                         s|j                         r<|j                  j!                  d      	 J d        |j                  d   | |||      S |j#                         r|j                         s"|j                         r|j#                         s|r@t        j$                  |j'                  | j(                  |j+                  |      |      |      S |j-                         r|j/                         r |j1                         r6|j/                         s&t3        t3        | t        j4                  |      ||      S |j                         xr+ |j                         xr |j                  |j                  kD  }|r?t        j$                  |j7                  | j(                  |j+                  |            |      S |j                         xr+ |j                         xr |j                  |j                  k  }	|	r?t        j$                  |j9                  | j(                  |j+                  |            |      S |j;                         r|j;                         r|j<                  |j<                  k7  s|j>                  |j>                  k7  r|jA                         xr |jC                          }
|jC                         rW| jD                  j+                  |      }t        j$                  |jG                  |      | jD                        }tI        | ||      S t        j$                  |jK                  | j(                  |j+                  |      |
      |      S |jM                         r|j;                         r|jC                         rW| jD                  j+                  |      }t        j$                  |jG                  |      | jD                        }tI        | ||      S |jA                         r?t        j$                  |jO                  | j(                  |j+                  |            |      S t        j$                  |jQ                  | j(                  |j+                  |            |      S |j;                         r|jM                         r|jC                         s|jA                         s?t        j$                  |jS                  | j(                  |j+                  |            |      S t        j$                  |jU                  | j(                  |j+                  |            |      S |jW                         r|j;                         r|j<                  }|dk(  r?t        j$                  |jY                  | j(                  |j+                  |            |      S |d	k(  rWtI        t3        | t        jZ                  |      t        j$                  |j]                  d
      t        jZ                        |      S |j;                         rO|jW                         r?t        j$                  |j_                  | j(                  |j+                  |            |      S |jW                         rO|jW                         r?t        j$                  |ja                  | j(                  |j+                  |            |      S J d|  d|        )NFTz]fp_downcast_rounding should be set only for truncating fp conversions. Source scalar type is z and destination type is convert_custom_typesz0target doesn't provide conversion for this type._builderr   r   r   zcannot cast z to )1rj   r   r#   r   ry   r   r  rs   r  r
   r  r  r"   rw  is_fp8e4b15codegen_fnsgetrI   r$   create_fp_to_fpr   r   rG   rF   rH   r}   rC   create_fp_trunccreate_fp_extrJ   r/   r0   r   is_boolr1   r  r0  r   is_standard_floatingcreate_fp_to_sicreate_fp_to_uicreate_ui_to_fpcreate_si_to_fprr   create_ptr_to_intrc   r   create_int_to_ptrr  )r   r  r)   fp_downcast_roundingr  r  r  use_custom_roundingtruncate_fpext_fpsign_extendtyr  bitwidths                 r   r}   r}   I  sq   ZZFv}}ejj.I.I.KLJJ 11EFJ$:$: %

'
'**G*G
G'@P@P@U@U)=!R%5%5%:%::RV<O+ 68;JHJefhklvhwx y y 	 J$:$:$<""&&"$+/0 	d1c	d 0:w""#9:5&J^ipqq 	
 6 6 8 Z%6%6%8yy00v||G?TVjkmstt 	Z%7%7%9Z%7%7%9D

G4j'JJ
 ((* F F%%
(E(EE  yy00v||G?TUW]^^ ##% F F%%
(E(EE  yy..u||V\\'=RSU[\\ z002:#:#::j>W>W[e[t[t>t ..0M9K9K9M5M""7+B711"5u{{CBUB0099W44U\\6<<PWCXZefhnoo &&(Z->->-@""7+B711"5u{{CBUB00%%'99W44U\\6<<PWCXY[abb99W44U\\6<<PWCXY[abb z>>@z'?'?'A99W44U\\6<<PWCXY[abb99W44U\\6<<PWCXY[abb z002**r>99W66u||V\\RYEZ[]cddq=T%7;RYYwGXGXYZG[]_]e]e=fhopp z002yy225<<gAVWY_`` z002yy//fll7>STV\]]4LtF8445r   c                "   t         j                  j                  }| rr| dk(  rt         j                  j                  }|S | dk(  rt         j                  j                  }|S | dk(  rt         j                  j
                  }|S t        d|  d      |S )Nz.ca.cgz.cvCache modifier  not supported)r
   CACHE_MODIFIERr   CACGCVr"   cache_modifiercaches     r   _str_to_load_cache_modifierr    s    ""EU"%%((E L u$%%((E
 L	 u$%%((E L ~.>nMNNLr   c                d   t         j                  j                  }| r| dk(  rt         j                  j                  }|S | dk(  rt         j                  j                  }|S | dk(  rt         j                  j
                  }|S | dk(  rt         j                  j                  }|S t        d|  d      |S )Nz.wbr  z.csz.wtr  r  )r
   r  r   WBr  CSWTr"   r  s     r   _str_to_store_cache_modifierr    s    ""EU"%%((E L u$%%((E L u$%%((E
 L	 u$%%((E L ~.>nMNNLr   c                    t         j                  j                  }| rQ| dk(  rt         j                  j                  }|S | dk(  rt         j                  j                  }|S t        d|  d      |S )N
evict_lastevict_firstzEviction policy r  )r
   EVICTION_POLICYNORMAL
EVICT_LASTEVICT_FIRSTr"   )eviction_policyevictions     r   _str_to_eviction_policyr    su    !!((Hl*))44H
 O	 -))55H O //@OPPOr   c                    d }| rQ| dk(  rt         j                  j                  }|S | dk(  rt         j                  j                  }|S t	        d|  d      |S )NzeronanzPadding option r  )r
   PADDING_OPTIONPAD_ZEROPAD_NANr"   )padding_optionpaddings     r   _str_to_padding_optionr    sh    GV#''00G
 N	 u$''//G N ~.>nMNNNr   c                d   t         j                  j                  }| r| dk(  rt         j                  j                  }|S | dk(  rt         j                  j                  }|S | dk(  rt         j                  j                  }|S | dk(  rt         j                  j
                  }|S t        d|  d      |S )Nacquirereleaseacq_relrelaxedMemory semantic r  )r
   MEM_SEMANTICACQUIRE_RELEASEACQUIRERELEASERELAXEDr"   )
sem_optionsems     r   _str_to_semr    s    
//
)
)C"//))C J 9$//))C J 9$//11C
 J	 9$//))C J /
|>JKKJr   c                "   t         j                  j                  }| rr| dk(  rt         j                  j                  }|S | dk(  rt         j                  j                  }|S | dk(  rt         j                  j                  }|S t        d|  d      |S )Ngpuctasysr  r  )r
   MEM_SYNC_SCOPEGPUCTASYSTEMr"   )scope_optionscopes     r   _str_to_scoper    s    !!E5 %%))E L U"%%))E
 L	 U"%%,,E L /~^LMMLr   c                ~   | rt        | d      s| g} | D cg c]*  }t        |t        j                        r|j                  n|, } }| D ]+  }t        |t
              rd|cxk  rt        |      k  r(J  J  t        |       dkD  sJ t        |       t        t        |             k(  sJ d       t        |       S yc c}w )N__iter__r   z'Duplicate dimension in `boundary_check`rW   )	hasattrr]   r#   rh   r@   ra   rB  setrg  )boundary_checkblock_shapeelemdims       r   _canonicalize_boundary_checkr    s    ~z2,-N]klUY
4(F$**DPll! 	HCc3'A,Gs;7G,GGG,GGG	H>"Q&&&>"c#n*=&>>i@ii>n%% ms   /B:c	           
        ||t        d      | j                  j                  j                  }	|	t        j                  k7  sJ d       |	j                         r(|t        j                  j                  k(  rt        d      | j                  j                  }
t        ||
j                               }t        j                  |j                  | j                  |||||      |
      S )NK`mask` and `other` arguments cannot be specified for loading block pointers4`tl.int1` should be rewritten in `tl.make_block_ptr`z@Padding option `nan` is not supported for integer block pointers)r"   rj   
element_tyr#   r`   rJ   r
   r  r  r  r   r$   create_tensor_pointer_loadr   )ptrmaskr   r  r  r  r  is_volatiler)   elt_tyr  s              r   _load_block_pointerr    s     5,fggXX  ++FRWWTTT}}7b&7&7&?&??[\\ XX  F 2.&BYBYB[\N 99**3::~wPUW_almouw wr   c	           
     T   | j                   j                  j                         s't        d| j                   j	                          d      ||t        d      |s|rt        d      | j                   j                         sN|r%|j                   j                         rt        d      |r%|j                   j                         rt        d      | j                   j                         rN|%t        || j                   j                         |      }|%t        || j                   j                         |      }| j                   j                  }	|	j                  }
|
t        j                  k(  }|r=t        j                  }
t        j                  |
|	j                        }	t        | |	|      } |t        ||
|      }| j                   j                         r1| j                   j                         }t        j                  |
|      }n|
}|3t        j                   |j#                  | j$                  |||      |      }nLt        j                   |j'                  | j$                  |j$                  |r|j$                  nd |||      |      }|rt        |t        j                  |      }|S )NUnsupported ptr type z in `tl.load`z)`other` cannot be provided without `mask`z`padding_option` or `boundary_check` argument is not supported for loading a tensor ofpointers or loading a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadEMask argument cannot be block type if pointer argument is not a blockzFOther argument cannot be block type if pointer argument is not a block)rj   ry   rr   r"   r   r   rs  r   r  r#   r`   int8pointer_typeaddress_spacer}   r   r$   create_loadr   create_masked_load)r  r  r   r  r  r  r  r  r)   ptr_tyr  r  r  r  r   s                  r   _load_legacyr  0  sA   88??!!#01B1B1D0E]STT |)DEE. T U 	U
 88DII&&(deeUZZ((*eff xx'chh.G.G.I7SD(0I0I0KWUE XX__FF G)=)=>3( UFG, xx))+vu-  |ii++CJJxUW]^ii&&szz4;;PU[_afhp'245;= 3)Jr   c	                   t        |      }	t        |      }
t        |      }| j                  j	                         r7| j                  j
                  j                         rt        | |||||	|
||	      S t        | |||||	|
||	      S rq   )	r  r  r  rj   rr   r  r   r  r  )r  r  r   r  r  r  r  r  r)   r  r  r  s               r   loadr  n  s     (7E&7H$^4G
xxSXX0099;"3e^WeU]_jlstt CunguhXcelmmr   c                    |j                  | j                  |j                  |            }t        j                  ||      S rq   )$create_reinterpret_tensor_descriptorr   r   r#   $_experimental_tensor_descriptor_base)desc_ptrblock_tyr)   r   s       r   reinterpret_tensor_descriptorr  ~  s5    99(//8>>ZaKbcF2268DDr   c                    t        |       dk7  ry | d   dk\  sJ d| d           d|j                  z  dz  }| d   |k\  sJ | d| d| d           y )	Nr	   r      zAtensor descriptor block shape must have at least 8 rows, but got r2  r   z2 tensor descriptor block shape must have at least  columns, but got )rB  r  )r  r1   min_colss      r   validate_descriptor_blockr    s    
5zQ 8q=h]^cde^f]ghh=U---1H	 { '!ST\S]]opuvwpxoyz{ r   c                   t        | t        j                        sJ t        | j                  | j
                         t        | j                        }t        |      |k(  sJ d| dt        |              t        ||d      }|j                  | j                  |t        |      t        |            }t        j                  || j                        S Nz	expected z offsets, but got Frequire_i64)r]   r#   r  r  r  r1   rB  _convert_to_ir_valuescreate_descriptor_loadr   r  r  r$   r   )descoffsetsr  r  r)   ndimrk   s          r   descriptor_loadr    s    dBCCDDDd..

;t Dw<4S9TF2DS\N!SS#GW%HG&&t{{G=XYg=h'>'O	QA99Q((r   c                   t        | t        j                        sJ t        | j                  | j
                         t        | j                        }t        |      |k(  sJ d| dt        |              |j                  | j                  k(  sJ t        ||d      }t        j                  |j                  | j                  |j                  |      t        j                        S r  )r]   r#   r  r  r  r1   rB  r  r  r$   create_descriptor_storer   void)r  r@   r  r)   r  s        r   descriptor_storer    s    dBCCDDDd..

;t Dw<4S9TF2DS\N!SS;;$*****#GW%HG99W44T[[%,,PWXZ\ZaZabbr   c                   t        | t        j                        sJ |dk(  sJ d       |dk(  sJ d       t        | j                        dk(  sJ d| j                          | j                  d   dk(  sJ d| j                          t        |j
                        dk(  sJ d	|j
                          |j
                  d   d
k\  sJ d|j
                          | j                  }d|j                  z  d
z  }| j                  d   |k\  sJ d| d| d| j                  d           t        j                  | j                  |j
                  d   | j                  d   g      }t        ||fd      d   }|j                  | j                  |j                  ||j                  |            }	t        j                  |	|      S )N z#cache modifier is not supported yetz$eviction policy is not supported yetr	   descriptor must be 2D, but got r   r   *descriptor block must have 1 row, but got x offsets must be 1D, but got r  z5descriptor gather must have at least 8 rows, but got r2  zdescriptor gather of  must have at least r  Fr  )r]   r#   r  rB  r  r  r1   r  r   r  create_descriptor_gatherr   r   r$   )
r  	x_offsetsy_offsetr  r  r)   r1   r  rj   rk   s
             r   descriptor_gatherr(    s   dBCCDDDRF!FFb H"HH  t A%[)HIYIYHZ'[[%A!#d'QRVRbRbQc%dd# y1$X(FyFW&XX$ ??1"m&[\e\k\k[l$mm"JJEU---1H	 }/w6J8*Tfgkgwgwxygzf{|}  ==iooa&8$:J:J1:M%NOD$WxlNqQH((i6F6FRVR\R\]dRefA99Qr   c                   t        | t        j                        sJ t        | j                        dk(  sJ d| j                          | j                  d   dk(  sJ d| j                          t        |j
                        dk(  sJ d|j                          |j
                  d   dk\  sJ d|j
                          | j                  }d	|j                  z  dz  }| j                  d   |k\  sJ d
| d| d| j                  d           t        ||fd      d   }|j                  | j                  |j                  |j                  |       t        j                  d t        j                        S )Nr	   r!  r   r   r"  r#  r  z6descriptor scatter must have at least 8 rows, but got r2  zdescriptor scatter of r$  r  Fr  )r]   r#   r  rB  r  r  shapaer1   r  r  create_descriptor_scatterr   r$   r  )r  r@   r&  r'  r)   r1   r  s          r   descriptor_scatterr,    s   dBCCDDD t A%[)HIYIYHZ'[[%A!#d'QRVRbRbQc%dd# y1$Y(FyGWGWFX&YY$ ??1"n&\]f]l]l\m$nn"JJEU---1H	 ~07KH:Ughlhxhxyzh{g|}~  %WxlNqQH%%dkk5<<AQAQS[\99T277##r   c                   |r"|d   j                   t        j                  k(  sJ t        j                  |
j	                  | j
                  |j
                  |D cg c]  }|j
                   c}|D cg c]  }|j
                   c}|D cg c]  }|j
                   c}|D cg c]  }|j
                   c}||||	
      t        j                        S c c}w c c}w c c}w c c}w )Nr   )r1   r#   rc   r$   create_tensormap_creater   r  )r	  global_addressbox_dim
global_dimglobal_strideelement_stride	elem_typeinterleave_layoutswizzle_mode	fill_moder)   rk   s               r   tensormap_creater8    s     a 0 6 6"(( BBB99''OO!!&'!QXX')*!QXX*,-!QXX--.!QXX.	
 	  (*-.s   C2CC$C$c                |    t        j                  |j                  | j                        t         j                        S rq   )r#   r$   #create_tensormap_fenceproxy_acquirer   r  )r	  r)   s     r   tensormap_fenceproxy_acquirer;    s)    99W@@QSUSZSZ[[r   c           	        |t        d      | j                  j                  j                         }|j                  j	                         st        |||      }|j                  j	                         sJ d       ||j                  j                         k(  s&J d| d|j                  j                          d       | j                  j                  j                  |j                  j                  k(  s@J d| j                  j                  j                   d|j                  j                   d       | j                  j                  j                  }|t        j                  k7  sJ d       t        ||      }t        |||      }t        j                  |j                  | j                  |j                  |||      t        j                        S )	Nr  z-Value argument must be block type or a scalarzBlock shape(z) and value shape(z
) mismatchzBlock element type(z) and value element type(r  )r"   rj   r  r   r   rs  r#   r`   r  r}   r$   create_tensor_pointer_storer   r  )	r  valr  r  r  r  r)   r  r  s	            r   _store_block_pointerr?    s    fgg ((%%668K88"3W=88O OO#((33   ]	k]"4SXX5N5N5P4QQ[\] 88))SXX-@-@@  qDWX[X`X`XkXkXvXvWw  xQ  RU  RZ  RZ  Re  Re  Qf  fp  Cq  q@XX  ++FRWWTTT 2.+NN sFG
$C 99W88SZZQ_afhpqWW r   c           	        | j                   j                  j                         s't        d| j                   j	                          d      |rt        d      | j                   j                         sL|j                   j                         rt        d      |r%|j                   j                         rt        d      | j                   j                         rLt        || j                   j                         |      }|%t        || j                   j                         |      }| j                   j                  }|j                  }|t        j                  k(  r=t        j                  }t        j                  ||j                        }t        | ||      } t        |||      }|Jt        j                  |j!                  | j"                  |j"                  ||      t        j$                        S |j                   j                  j'                         st        d      t        j                  |j)                  | j"                  |j"                  |j"                  ||      t        j$                        S )Nr  z in `tl.store`z`boundary_check` argument is not supported for storing a tensor of pointers or storing a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadzFValue argument cannot be block type if pointer argument is not a blockr  z"Mask must have boolean scalar type)rj   ry   rr   r"   r   r   rs  r   r  r#   r`   r  r  r  r}   r$   create_storer   r  r  create_masked_store)	r  r>  r  r  r  r  r)   r  r  s	            r   _store_legacyrC    s   88??!!#01B1B1D0E^TUU  A B 	B
 8888effDII&&(dee xx"3(A(A(CWM'chh.G.G.I7SDXX__FF )=)=>3( sFG
$C |yy--cjj#**eXVXZX_X_``99##%=>>99W00SZZV[]efhjhohoppr   c           	        t        |      }t        |      }| j                  j                         s$| j                  j                  j                         rt        d      | j                  j                         r5| j                  j                  j                         rt        | ||||||      S t        | ||||||      S )N"Cannot store to a constant pointer)r  r  rj   is_constry   r"   rr   r  r   r?  rC  )	r  r>  r  r  r  r  r)   r  r  s	            r   storerG  C  s     )8E&7H
xxchhoo668=>>
xxSXX0099;#CdNE8U\]] S#t^UHgVVr   c           	     B   t        |      }t        |      }| j                  j                  j                  }|j
                  dvrt        d      t        j                  |j                  | j                  |j                  |j                  ||      |j                        S )N)   r2  r   z9atomic_cas only supports elements with width {16, 32, 64})r  r  rj   ry   r  r  r"   r#   r$   create_atomic_casr   )r  cmpr>  r  r  r)   r  s          r   
atomic_casrL  Y  s|    
c
C% E++J$$L8TUU99W..szz3::szzSVX]^`c`h`hiir   c                   | j                   j                  j                         s&t        d| j                   j	                         z         | j                   j                         s$| j                   j                  j                         rt        d      | j                   j                  j                  }|t        j                  u r|dk7  rt        d|z   dz         |t        j                  t        j                  t        j                  t        j                  fv rt        d|z   dz   t        |      z         | j                   j                         rN|%t        || j                   j!                         |      }|%t        || j                   j!                         |      }t#        || j                   j                  j                  |      }||j%                  d      }t        j                  }| j                   j                         rf|j'                  || j                   j!                               }t        j(                  t        j                  | j                   j!                               }t        j*                  ||      }| ||fS )Nz)Pointer argument of store instruction is rE  r   atomic_z does not support fp16z does not support T)rj   ry   rr   r"   r   rF  r  r#   rA   r`   r  int16rB   rw  r   rs  r   r}   r_   rC  r   r$   )r  r>  r  opr)   r  mask_irmask_tys           r   atom_red_typechecking_implrS  b  s   88??!!#DsxxGXGXGZZ[[
xxchh11::<=>>++JRZZB%KR*BBCCbggrww"++>>R*>>ZPQQ
xx'chh.G.G.I7SD?&sCHH,E,E,GQC
sCHHOO..
8C|""4(''88**7CHH4M4M4OPGmmBGGSXX-F-F-HIGyy'*T>r   c                   t        | ||d|      \  } }}t        |      }t        |      }|j                  j                  }|j                         r|j                         rjt        j                  |j                  t        j                  j                  | j                  |j                  |j                  ||      |j                        S t        j                  |j                  t        j                  j                  | j                  |j                  |j                  ||      |j                        S |t        j                  t        j                   hvrt#        d|       t%        g d||      }|t        j                  k(  rt        j&                  nt        j(                  }t+        |||      }	t+        | t        j,                  |d      |      }
|t        j                  k(  rt        j.                  nt        j0                  }t+        |||      }t+        | t        j,                  |d      |      }t3        |||      }t5        |||      }t        j                  |j                  t        j                  j                  |
j                  |	j                  t7        |||      j                  ||      |	j                        }t        j                  |j                  t        j                  j8                  |j                  |j                  t7        |||      j                  ||      |j                        }t;        ||||      }t+        |||      S )Nr   z#atomic_max not supported for dtype r\   r   )rS  r  r  rj   ry   rJ   r   r#   r$   create_atomic_rmwr
   	ATOMIC_OPMAXr   UMAXrC   rE   r4   re   r&   rc   r   r  rb   rd   r   r#  r   UMINwherer  r>  r  r  r  r)   sca_tyr  i_typei_vali_ptrui_typeui_valui_ptrposnegpos_retneg_retr   s                      r   
atomic_maxrg  }     /S$wONCd
c
C% EXX__F}}!99))",,*:*:CJJ

TXT_T_adfklnqnvnvx x 99))",,*;*;SZZUYU`U`beglmorowowy y
 bjj"**--=fXFGGC)D2::-RXX288FC)EC3W=E!RZZ/biiRYYGS'7+FS"//'15w?F
T7
+C
Cw
'Cii!!",,"2"2ELL%,,"&tS'":"A"A3	OPUPZPZ\G ii!!",,"3"3V]]FMM"&tS'":"A"A3	OPVP[P[]G Wgw
/C3((r   c                   t        | ||d|      \  } }}t        |      }t        |      }|j                  j                  }|j                         r|j                         rjt        j                  |j                  t        j                  j                  | j                  |j                  |j                  ||      |j                        S t        j                  |j                  t        j                  j                  | j                  |j                  |j                  ||      |j                        S |t        j                  t        j                   hvrt#        d|       t%        g d||      }|t        j                  k(  rt        j&                  nt        j(                  }t+        |||      }	t+        | t        j,                  |d      |      }
|t        j                  k(  rt        j.                  nt        j0                  }t+        |||      }t+        | t        j,                  |d      |      }t3        |||      }t5        |||      }t        j                  |j                  t        j                  j                  |
j                  |	j                  t7        |||      j                  ||      |	j                        }t        j                  |j                  t        j                  j8                  |j                  |j                  t7        |||      j                  ||      |j                        }t;        ||||      }t+        |||      S )Nr   z#atomic_min not supported for dtype r\   r   )rS  r  r  rj   ry   rJ   r   r#   r$   rU  r
   rV  MINr   rY  rC   rE   r4   re   r&   rc   r   r  rb   rd   r   r#  r   rX  rZ  r[  s                      r   
atomic_minrk    rh  r   c           
        t        | ||d|      \  } }}t        |      }t        |      }|j                  j                  }|j                         rt        j                  j                  nt        j                  j                  }t        j                  |j                  || j                  |j                  |j                  ||      |j                        S )Nr   )rS  r  r  rj   ry   rs   r
   rV  FADDADDr#   r$   rU  r   )r  r>  r  r  r  r)   r\  rP  s           r   
atomic_addro    s    /S$wONCd
c
C% EXX__F$002		8H8HB99W..r3::szz4;;X[]bcehememnnr   c           
     (   t        | ||d|      \  } }}t        |      }t        |      }t        j                  |j                  t        j                  j                  | j                  |j                  |j                  ||      |j                        S )Nand)rS  r  r  r#   r$   rU  r
   rV  ANDr   rj   r  r>  r  r  r  r)   s         r   
atomic_andrt    x    /S$wONCd
c
C% E99W..r||/?/?SZZY]YdYdfikpqXX r   c           
     (   t        | ||d|      \  } }}t        |      }t        |      }t        j                  |j                  t        j                  j                  | j                  |j                  |j                  ||      |j                        S )Nor)rS  r  r  r#   r$   rU  r
   rV  ORr   rj   rs  s         r   	atomic_orry    sv    /S$gNNCd
c
C% E99W..r||

CJJX\XcXcehjopXX r   c           
     (   t        | ||d|      \  } }}t        |      }t        |      }t        j                  |j                  t        j                  j                  | j                  |j                  |j                  ||      |j                        S )Nxor)rS  r  r  r#   r$   rU  r
   rV  XORr   rj   rs  s         r   
atomic_xorr}    ru  r   c           
     (   t        | ||d|      \  } }}t        |      }t        |      }t        j                  |j                  t        j                  j                  | j                  |j                  |j                  ||      |j                        S )Nxchg)rS  r  r  r#   r$   rU  r
   rV  XCHGr   rj   rs  s         r   atomic_xchgr    sx    /S$PNCd
c
C% E99W..r||/@/@#**cjjZ^ZeZegjlqrXX r   c                    | j                         |j                  j                  v s!J d|j                  j                   d|         | j                         } | dk(  rd} t	        t
        j                  |       S )Nzinput_precision must be one of . Got TF32X3TF32x3)lowerr   allowed_dot_input_precisionsupperr=  r
   INPUT_PRECISION)input_precisionr)   s     r   _str_to_dot_input_precisionr    sx      "goo&R&RR p
)'//*V*V)WW]^m]nopR%++-O(""2%%77r   c           
        | j                   j                         r|j                   j                         sJ | j                  j                         r|j                  j                         rn| j                  t        j
                  t        j                  t        j                  t        j                  t        j                  fv sJ d| j                          |j                  t        j
                  t        j                  t        j                  t        j                  t        j                  fv sJ d|j                          | j                  |j                  k(  s!J d| j                   d|j                          | j                  j                         s|j                  j                         r6t        | t        j                  |      } t        |t        j                  |      }||j                  j                  }t        ||      }t        | j                         }t        |j                         }||cxk(  rdk(  s1n ||cxk(  rdk(  s$n J d| j                    d|j                    d	       | j                   d
   j"                  |j                   d   j"                  k(  sVJ d| j                    d|j                    d| j                   d
   j"                   d|j                   d   j"                   d		       |j$                  j'                  d      J d        |j$                  d   | j                   |j                         }	| j                   d   j"                  |	d   k\  r>| j                   d
   j"                  |	d   k\  r|j                   d
   j"                  |	d   k\  sJ d|	d    d|	d    d|	d           | j                   j(                  j+                         rP| j                   j(                  t        j
                  k(  sJ d       |j-                  d      }
t        j.                  }n|j1                         rt3        d      | j                   j(                  j5                         s$| j                   j(                  j1                         r"|j7                  d      }
t        j                  }n4|j9                         r|j;                  d      n|j7                  d      }
|}| j                   j                   d   }|j                   j                   d
   }| j                   j                   d
   }|dk(  r| j                   j                   d   nd }t	        j<                  ||r|||gn||g      }||j?                  |
|r|||gn||g      }n|j@                  }|j                   |k(  sJ |N| j                  j                         r1|j                  j                         r|j                  jB                  }nNd}nK| j                  j                         r1|j                  j                         r||kD  rt3        d| d| d	      t	        jD                  |jG                  | j@                  |j@                  |||      |      S )NzUnsupported lhs dtype zUnsupported rhs dtype z&Both operands must be same dtype. Got r   r	      +Both inputs must be either 2D or 3D; (lhs: 	 vs rhs: r
  rT  zFirst input shape (z) and second input shape z= are not compatible for matmul (second index of first shape (z0) must be equal to first index of second shape (min_dot_sizez2target doesn't provide lower shape bounds for dot.r   r   zInput shapes should have M >= z, N >= z
 and K >= zonly int8 supported!zhout_dtype=bfloat16 is unsupported. Please use out_dtype=float32/float16 and cast with `.to(tl.bfloat16)`zmax_num_imprecise_acc (z) must be <= K ()$rj   r   r1   rI   r#   r  uint8rA   rB   rC   r  r}   r   default_dot_input_precisionr  rB  r  r@   r  r  ry   rJ   	get_int32r&   rH   r"   rF   get_fp32rG   get_fp16r   rC  r   max_num_imprecise_acc_defaultr$   
create_dot)r   r   accr  max_num_imprecise_acc	out_dtyper)   lhs_rankrhs_rankr  r  ret_scalar_tyMNKBr   
acc_handles                     r   dotr    s   88388#4#4#666
yycii..0yyRWWbhh

BKKZZ) ) 	O,B399+*N	O )yyRWWbhh

BKKZZ) ) 	O,B399+*N	O )yyCII%k)OPSPYPY{Z_`c`i`i_j'kk%
yy#))"7"7"93

G,3

G,!//EE1/7KO399~H399~Hx$1$H(A(A  REpqtqzqzp{  |E  FI  FO  FO  EP  PQ  DR  RA99R=#))
#E q(3LSYYK  XU  VY  V_  V_  `b  Vc  Vi  Vi  Uj  jZ  [^  [d  [d  eg  [h  [n  [n  Zo  op  qq "">2>t@tt>67&&~6sxxJL99R=,q/1ciim6I6I\Z[_6\IIbM<?2r,\!_,=W\RS_DUU_`lmn`o_pqr 3 xxxx"'')A+AA)q!				vx 	x		 	 	"chhoo&=&=&?a 

$-$5$5$7Wa W=M=Ma=P!rArArA%]qA]]=q1a)q!fEF
{))"1q!Qi1a&I
ZZ
xx6!!! $99#))"2"2"4$+OO$Q$Q!$%!99#))"2"2"49NQR9R67L6MM]^_]``abcc99W''

CJJ
O]rs r   c                z    t        t        j                  | j                         d       }|t	        d|  d      |S )NzInvalid float format: rV   )r=  r
   ScaleDotElemTypeTYr  r"   )float_formatty_enums     r   _str_to_fp_typer  L  s>    b++\-?-?-A4HG1,qABBNr   c                2   t         j                  t         j                  t         j                  t         j                  dj                  |      }|B|dk(  s
J d|        | j                  t         j                  k(  sJ d| j                          | S | j                  |k(  r| S t         j                  t         j                  t         j                  t         j                  d|   }| j                  |k(  sJ d| d| j                          t        | ||      S )z
    If float_format is subbyte, make sure it's packed as uint8 and return it.
    Otherwise, return a tensor (perhaps bitcasting) of the specified float format.
    )e5m2e4m3bf16fp16e2m1z)Internal Error: Unexpected float format: z)e2m1 format must be packed as uint8. Got zUnexpected dtype for r  )
r#   float8e5
float8e4nvrB   rA   r  r1   r  uint16r   )r>  r  r)   	triton_tyunsigned_tys        r   _bitcast_to_fp_typer  S  s    
 bmmR[[Z\ZdZdeiijvwIv%a)RS_R`'aa%yyBHH$](QRUR[R[Q\&]]$

yyI
!xx299VXV_V_`amnyyK'`+@fUXU^U^T_)``'sIw//r   c
                   | j                   j                         r|j                   j                         sJ t        | j                        }
t        |j                        }|
|cxk(  rdk(  s1n |
|cxk(  rdk(  s$n J d| j                   d|j                   d       |j                  }|j                  }t        |      }t        |      }h d}||v s
J d|        ||v s
J d|        |d u xs* t        |t        j                        xr |j                  d u }|d u xs* t        |t        j                        xr |j                  d u }t        | ||	      } t        |||	      }| j                   j                  d	   }|j                   j                  d	d  \  }}|d
k(  rdnd}|d
k(  rdnd}||z  || j                   j                  d   z  k(  s"J d| j                   d|j                   d       |
dk(  r| j                   j                  d   nd }t        j                  ||r|||gn||g      }|	j                  d      }||	j                  ||r|||gn||g      }n|j                  }|j                   |k(  sJ |rd n|j                  }|rd n|j                  }t        j                  |	j                  | j                  |||j                  ||||      |      S )Nr	   r  r  r  r
  >   r  r  r  r  r  zNYI: lhs_format zNYI: rhs_format r  r  r   rT  zCReduction dimension should pack the same number of elements; (lhs: r   )rj   r   rB  r  r@   r  r]   r#   rh   r  r   r  rC  r   r$   create_dot_scaled)r   	lhs_scale
lhs_formatr   	rhs_scale
rhs_formatr  	fast_mathr  r)   r  r  lhs_format_enumrhs_format_enumallowed_formatsrhs_scale_is_nonelhs_scale_is_noner  r  r  PACKED_APACKED_Br  r   r  r  rhs_scale_handlelhs_scale_handles                               r   
dot_scaledr  e  s    88388#4#4#666399~H399~Hx$1$H(A(A  REpqtqzqzp{  |E  FI  FO  FO  EP  PQ  DR  RA &&J &&J%j1O%j1O>O(I,<ZL*II((I,<ZL*II(!T)njBLL.Q.mV_VeVeimVm!T)njBLL.Q.mV_VeVeimVm
c:w
7C
c:w
7CrA88>>"#DAq&(qaH&(qaHx<8chhnn
'   tRSVS\S\R]]fgjgpgpfqqrst  &]qA]]91q!Qi1a&AF			!	B
{))"1q!Qi1a&I
ZZ
xx6!!!0ti6F6F0ti6F6F99!!#**.>QTQ[Q[]m"19j	JKQS Sr   c                   | j                   t        j                  k7  r"t        j                  d| j                           t        | t        j                  |      } t        |||dd      \  }}| j                  j                         r!t        | ||      \  } }t        |||      \  }}nt        | ||      \  } }|j                  }t        j                  |j                  | j                  |j                  |j                        |      S )Nzgtl.where with a non-boolean condition is deprecated and will error out in a future triton release. Got T)r1   r#   r`   warningswarnr}   r   rj   r   r~   r$   create_selectr   )	conditionrk   r   r)   r|  r   s         r   rZ  rZ    s    "''!uv  wF  wF  vG  H	
 Y1I'1gtTBDAq~~ +Iq'B	1#Aq'21+Iq'B	1VVF99W**9+;+;QXXqxxPRXYYr   c                d    |rt        j                  ||      }n|}t        j                  | |      S rq   )r#   r   r$   )rk   rN   r}  res_tys       r   wrap_tensorr    s-    y)4 99Qr   c                   	
 |t        fd D               d} d   j                  j                  
t        
      }||k  sJ d| d       t	        
      D cg c]  \  }}||k7  s| c}}	t        
fd D              sJ d       j                   D cg c]  }|j                   c}|       |       j                          t         	fdt        t                     D              S c c}}w c c}w )Nc              3  f   K   | ](  }t        ||j                  j                  gd        * yw)TrU  N)rI  r<  r@   )rd  tr)   s     r   rf  zreduction.<locals>.<genexpr>  s*     fZ[wq177==/tWUUfs   .1r   z&reduction axis must be < inputs rank (r
  c              3  P   K   | ]  }|j                   j                  k(    y wrq   )rj   r  )rd  r  r  s     r   rf  zreduction.<locals>.<genexpr>  s     5qvv||u$5s   #&z-all reduction inputs must have the same shapec              3     K   | ]7  }t        j                  |      |   j                  j                         9 y wrq   r  
get_resultrj   ry   )rd  rq  inputs	reduce_opr}  s     r   rf  zreduction.<locals>.<genexpr>  s4     t\]Y11!4fQinn6K6KYWt   =A )
tuplerj   r  rB  rn  allcreate_reducer   verifyr8  )r  r(   region_builder_fnr)   rankrq  rH  r  r  r}  r  s   `  `    @@@r   	reductionr    s    |f_eff1INN  Eu:D$;H@aHH;(/=tq!19=I5f55f7ff5%%&@Aqxx&@$GIi tafgjkqgrasttt > 'As    C;.C; Dc                     d   j                   j                  t              }| |cxk  r|k  sn J d| d| d       |dk  r||z  } D ]"  }|j                   j                  k(  rJ d        |j                   D cg c]  }|j                   c}||       |       j                          t         fdt        t                     D              S c c}w )Nr   z
scan axis z must be < inputs rank (r
  z(all scan inputs must have the same shapec              3     K   | ]7  }t        j                  |      |   j                  j                         9 y wrq   r  )rd  rq  r  scan_opr  s     r   rf  z#associative_scan.<locals>.<genexpr>  s4     nVWW//2F1INN4I4I5Qnr  )rj   r  rB  create_scanr   r  r  r8  )	r  r(   r  reverser)   r  r  r  r  s	   `      @@r   associative_scanr    s    1INN  Eu:D5D4S:dV3KD6QR!SSax Qvv||u$P&PP$Q !!V"<188"<dGLGgNNn[`adekal[mnnn	 #=s   C c                   |j                   j                         sJ d       t        | j                  j                        }t        |j                  j                        |k(  sJ d       | |cxk  r|k  sn J d| d| d       |dk  r||z  }t        |      D ]F  }||k(  r	|j                  j                  |   | j                  j                  |   k(  r=J d| d        |j                  | j                  |j                  |      }t        || j                  j                  |j                  j                        S )	Nzindex must be an integer tensorz0source and index tensors must have the same rankzgather axis z must be < source rank (r
  r   z
index dim z( must match the corresponding source dim)
r1   rJ   rB  rj   r  r8  create_gatherr   r  ry   )srcindexr(   r)   r  re  gathers          r   r  r    s%   ;;B!BBsxx~~Duzz D(\*\\(5D4U<v5MdVST!UUax4[ u9zz"chhnnQ&77t:dVKs9tt7u
 ""3::u||TBFvsxx

0@0@AAr   c                (   t        | j                        dk(  sJ d       | j                  j                         sJ d       t	        j
                  |j                  | j                  |      t	        j                  t        j                  |g            S )Nr   z histogram only supports 1D inputz%histogram only supports integer input)
rB  r  r1   rJ   r#   r$   create_histogramr   r   r&   )r   num_binsr)   s      r   	histogramr    ss    u{{q D"DD ;;H!HH99W--ellHEr}}UWU]U]`h_iGjkkr   c                   t        dt        | j                              t        |      k7  rt        d      | j                  j                  dt        j                  || j                  j                                      | S )Nr   zAShape of input to multiple_of does not match the length of valuesztt.divisibility)	r   rB  r  r"   r   set_attrr
   	make_attrget_contextrk   ru  s     r   multiple_ofr    s[    
1c!''ls6{*\]]HH'fahh>R>R>T)UVHr   c                    t        | j                        t        |      k7  rt        d      | j                  j	                  dt        j                  || j                  j                                      | S )NzDShape of input to max_contiguous does not match the length of valuesztt.contiguityrB  r  r"   r   r  r
   r  r  r  s     r   max_contiguousr  	  sS    
177|s6{"_``HHor||FAHH<P<P<R'STHr   c                    t        | j                        t        |      k7  rt        d      | j                  j	                  dt        j                  || j                  j                                      | S )NzCShape of input to max_constancy does not match the length of valuesztt.constancyr  r  s     r   max_constancyr    sS    
177|s6{"^__HHnbll6188;O;O;Q&RSHr   c                f    t        j                  | j                         t         j                        S rq   )r#   r$   create_barrierr  )r)   s    r   debug_barrierr    s     99W++-rww77r   c           	     .   | j                  d      s|r| dz  } | j                  d      s
|r| d d dz   } t        |       dkD  r| j                  d      sd| z   } |D cg c]  }|j                   }}|D cg c][  }|j                  t
        j                  t
        j                  t
        j                  t
        j                  t
        j                  fv ] }}t        j                  |j                  | |||      t
        j                        S c c}w c c}w )N rm  rT  r	   )endswithrB  
startswithr   r1   r#   r`   r  rO  r&   rc   r$   create_printr  )prefixargshexr)   argnew_args	is_signeds          r   device_printr    s     ??3D#??4 Tt#
6{Qv005v&*+s

+H+Z^_SVrww288RXXNN_I_99W))&#xKRWWUU ,_s   D5A Dc                    |j                   j                  sy t        j                  |j	                  | j
                  |      t        j                        S rq   )r   debugr#   r$   create_assertr   r  )r   r   r)   s      r   r   r   *  s8    ??  99W**4;;<bggFFr   c                |    t        j                  |j                  | j                        t         j                        S rq   )r#   r$   create_assumer   r  )r   r)   s     r   assumer  0  s&    99W**4;;7AAr   c                   t        |t              rt        j                  |      }t        |t        j                        r|rGd|j                  cxk  rdk  sn J d|j                   d       | j                  |j                        S d|j                  cxk  rdk  sn J d|j                   d       | j                  |j                        S t        |t        j                        r|j                  j                  dk(  sJ d	       |j                  j                         sJ d
       |j                  t        j                  k7  rE|rC| j                  |j                  | j                         |j                  j                               S |j                  t        j                   k7  r	|sJ d       |j                  S J dt#        |              )NrT   rU   z@Block pointers only support 64 bit `shape/strides`, got a value z which is out of the rangerR   rS   zFBlock pointers only support 32 bit `offsets/block_shape`, got a value r   z*Expected a scalar in shape/strides/offsetsz8Expected an integer scalar type in shape/strides/offsetszzBlock pointers only support 32 bit `offsets/block_shape`, add a `.to(tl.int32)` or use regular indexing for 64 bit supportz3Unsupported element type in shape/strides/offsets: )r]   ra   r#   rh   r@   r   r  r$   r<  r1   rJ   rc   r   r   get_int64_tyr   r&   rj   )r)   r  r  s      r   _convert_elem_to_ir_valuer  4  s   $||D!$%TZZ/%/ F 4#zzl*D2F F/$$TZZ00TZZ/%/ F 4#zzl*D2F F/$$TZZ00	D"))	$zz1$R&RR$zz  "^$^^"::!k**4;;8L8L8NPTPZPZPhPhPjkkZZ288#KS S S5{{TGT
|TT5r   c                v    t        |d      r|D cg c]  }t        | ||       c}S t        | ||      gS c c}w )Nr  )r  r  )r)   	list_liker  r  s       r   r  r  L  s?    y*%R[\$)'4E\\%gy+FGG ]s   6c           	        t        ||      }t        ||      }t        ||d      }| j                  j                         r$| j                  j                  j	                         rt        d      | j                  j                  t        j                  k(  rCt        | t        j                  t        j                  | j                  j                        |      } t        d      sgD cg c]*  }t        |t        j                        r|j                  n|, c}t!        d D              sJ d       t        |d      s|g}|D cg c]*  }t        |t        j                        r|j                  n|, }}t#        |      t%        t'        t)        |                  k(  sJ d       t!        fd||||fD              sJ d	       |j+                  | j,                  ||||      }t        j.                  |t        j                  t        j0                  | j                  j                                    S c c}w c c}w )
NFr  zMExpected `base` to be a pointer type (but not a block pointer type or others)r  c              3  `   K   | ]&  }t        |t              xr d |cxk  xr dk  nc  ( yw)rR   rS   N)r]   ra   )rd  r  s     r   rf  z!make_block_ptr.<locals>.<genexpr>e  s)     XDz$$?4)?%)??Xs   ,.zGExpected a list of constant integers (`int32_t` range) in `block_shape`z<Expected a permutation of (0, 1, ..., len(order)-1) in orderc              3  L   K   | ]  }t              t        |      k(    y wrq   )rB  )rd  r  r  s     r   rf  z!make_block_ptr.<locals>.<genexpr>o  s     dis;3y>1ds   !$zBExpected shape/strides/offsets/block_shape to have the same length)r  rj   rr   r  r   r"   r#   r`   r}   r  r  r  r  r]   rh   r@   r  rg  rh  r8  rB  create_make_block_ptrr   r$   r   )	baser  stridesr  r  orderr)   r  r   s	       `    r   make_block_ptrr  R  s    "'51E#GW5G#GW%HG 99!5!5!>!>!@hii yyrww&D"//"''4993J3JKWU ;
+"mVabdD",,!?4::TIbKXKXX RQRX 5*%PUV:dBLL9TZZtCVEV%=Ds5z!233s5ss3 dE7T[]bCcdd MLMd **4;;wQ\^cdF99VR__R]]499;O;OQ\-]^__% c Ws   ./H=/Ic                    t        ||d      }t        j                  |j                  | j                  |      | j
                        S )NFr  )r  r#   r$   create_advancer   rj   )r  r  r)   s      r   advancer  y  s8    #GW%HG 99W++DKKA499MMr   c                   t        |      }d|cxk  rdk  sn t        d| d      t        |      |k7  rt        d| dt        |             t        |      |k7  rt        d| dt        |             t        j                  |d	         |d	<   |d	   d
k7  rt        d|d	          |D cg c]  }t	        ||       }}|D cg c]-  }t	        ||      j                  t        j                  |      / }}t        j                  |      }t        | j                  t        j                        sJ t        j                  | j                  j                  |      }|j                  | j                  |D cg c]  }|j                   c}|D cg c]  }|j                   c}|      }	t        j                  |	|||      S c c}w c c}w c c}w c c}w )Nr	      z Expected 2 <= ndim <= 5 but got z dimensionsz	Expected z strides but got zExpected block_shape to have z dimensions but got rT  r   z-Tensor descriptor last dim must be 1 but got r  )rB  r"   r#   rK  ri   torc   _unwrap_shaper]   rj   r  r   r  create_make_tensor_descriptorr   _experimental_tensor_descriptor)
r  r  r  r  r)   r  rk   rj   rH  r   s
             r   make_tensor_descriptorr%    s    u:DNN;D6MNN
7|t9TF*;CL>JKK
;48>RSVW^S_R`abb((5GBKr{aHQSVWW,12qYq'"2E2MTUyG$''7'CUGU "";/Kdii111==--{;D224;;SX@Ya@Ynu[vij\]\d\d[v3>@F--feWdKK 3U AZ[vs   0G	2G<G
G)r(   ra   r)   
ir.builderreturn	tl.tensor)r5   tl.dtyper6   r)  r'  r)  )r5   r)  rK   r^   r6   r)  rL   r^   rM   r^   r'  r)  )T)rl   r^   )r   r)  r   r)  rt   r^   r'  None)FFTF)r   tl.tensor | numbers.Numberr   r+  r)   r&  r'  Tuple[tl.tensor, tl.tensor])r   r(  r   r(  r)   r&  r   callable)
r   r+  r   r+  r   r^   r)   r&  r'  r(  )r   r+  r   r+  r)   r&  r'  r(  )
r   r+  r   r+  r   r^   r)   r&  r'  r(  )rk   r(  r   r(  r   tl.PropagateNanr)   r&  )
rk   r(  r   r(  r   r(  r   r.  r)   r&  )r   r(  r   r(  r)   r&  r'  r,  )r   r(  r   r(  r)   r&  r'  r(  )r   r(  r)   r&  )r   r(  r'  r(  )r   r(  r)   r&  r'  r(  )r   r(  r)   r(  r'  r(  )r  r(  r'  tl.block_type)r4  ra   r5  ra   r)   r&  r'  r(  )r  	List[int]r1   r)  r)   r&  r'  r(  )r@   r(  r  r0  r)   r&  r'  r(  )
r   r(  rF  r0  rG  r^   r)   r&  r'  r(  )r   r(  r(   ra   r)   r&  r'  r(  )
r   r(  r   r(  rG  r^   r)   r&  r'  r(  )rW  r(  rX  r(  r)   r&  r'  r(  )rW  r(  r)   r&  r'  r,  )r   r(  rj  z
Tuple[int]r)   r&  r'  r(  )r   r(  r  r0  r)   r&  r'  r(  )r   r(  r   r(  r)   r&  r'  r(  )r  Optional[str])r   r(  r  r)  r)   r&  r'  r(  rq   )
r   r(  r  r)  r)   r&  r  r1  r'  r(  )r  r(  r  Optional[tl.tensor]r   r2  r  r   r  rw  r  rw  r  rw  r  r^   r)   r&  r'  r(  )r	  r(  r
  r/  r)   r&  )
r  z&tl._experimental_tensor_desciptor_baser  rw  r  rw  r)   r&  r'  r(  )r  z'tl._experimental_tensor_descriptor_baser@   r(  r)   r&  r'  r(  )r  rw  r  rw  r)   r&  r'  r(  )r@   r(  r)   r&  r'  r(  )r	  r(  r/  r(  r0  List[tl.tensor]r1  r3  r2  r3  r3  r3  r4  ra   r5  ra   r6  ra   r7  ra   r)   r&  r'  r(  )r	  r(  r)   r&  r'  r(  )r  r(  r>  r(  r  r2  r  rw  r  rw  r)   r&  r'  r(  )r  r(  rK  r(  r>  r(  r  rw  r  rw  r)   r&  r'  r(  )r  r(  r>  r(  r  r(  rP  rw  r)   r&  r'  z&Tuple[tl.tensor, tl.tensor, tl.tensor])r  r(  r>  r(  r  r(  r  rw  r  rw  r)   r&  r'  r(  )r   r(  r   r(  r  r(  r  r1  r  ra   r  r)  r)   r&  r'  r(  )r  rw  )r>  r(  r  rw  r)   r&  )r   r(  r  r(  r  rw  r   r(  r  r2  r  rw  r  ztl.tensor | Noner  r^   r  r)  r)   r&  r'  r(  )
r  r(  rk   r(  r   r(  r)   r&  r'  r(  )r  Sequence[tl.tensor]r(   ra   r)   r&  r'  Tuple[tl.tensor, ...])
r  r4  r(   ra   r  r^   r)   r&  r'  r5  )
r  r(  r  r(  r(   ra   r)   r&  r'  r(  )r   r(  r  ra   r)   r&  r'  r(  )rk   r(  ru  r0  r'  r(  )r)   r&  r'  r(  )
r  rw  r  r3  r  r^   r)   r&  r'  r(  )r   r(  r   rw  r)   r&  r'  r(  )r  r(  r)   r&  r'  r(  )r  r(  r  r3  r  r3  r  zList[tl.constexpr]r)   r&  r'  z"tl._experimental_tensor_descriptor){
__future__r   r  typingr   r   r   r   r   rw   _C.libtritonr
   r   r   r#   r   	Exceptionr   r*   r-   r;   rP   ri   ru   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r   r   r  r  r   r#  r   r+  r0  r9  re   r?  rI  rN  rR  r\  ra  rk  rs  r~   r  r   r}   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r(  r,  r8  r;  r?  rC  rG  rL  rS  rg  rk  ro  rt  ry  r}  r  r  r  r  r  r  rZ  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r%  rW   r   r   <module>r:     s   "  ; ;   CLF	 FDF@ 0,&*0,/70,f#V	< ]a,1#6Q#L&& : : ) :F44 )4$44 )4R4:&&!*&425$5$	]"+5:UQ
P
Q
'&"R
R
Q)$)4444	4	4"D"(.H[MK2
IL$2tpZ( 04h5,h58Ah5` 		 
w,;|nn.1nDGnVZnn!*n E
{
)'
),5
)	c(	c-6	c).74$,   	
 # $      >\:)qXWW)3W8AW,j(27]6$)N$)No#(18FF&0F5>FR0$(S(S%5(SBF(SS[(S"(S'0(S`Z, u.o(o-Bo2B0l8VGBU0H$`NNL
LL L $	L
 L (Lr   