
    nVh-                     8    d Z ddlZ G d d      Z G d d      Zy)aN  
Helper classes for working with low precision floating point types that
align with the opencompute (OCP) microscaling (MX) specification.
  * MXFP4Tensor: 4-bit E2M1 floating point data
  * MXScaleTensor: 8-bit E8M0 floating point data
Reference: https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
    Nc                   2    e Zd ZddZd Zd Zd Zd Zd Zy)	MXFP4TensorNc                    || _         |It        |t        j                        sJ d       |j                   | _         | j	                  |      | _        y|!t        |t              r|| _        y|f| _        yt        d      )at  
        Tensor class for working with four bit E2M1 floating point data as defined by the
        opencompute microscaling specification.


        Parameters:
        - data: A torch tensor of float32 numbers to convert to fp4e2m1 microscaling format.
        - size: The size of the tensor to create.
        - device: The device on which to create the tensor.
        N%Parameter data must be a torch tensor.Either parameter data or size must be provided	device
isinstancetorchTensor_from_floatdatatuplesize
ValueErrorselfr   r   r	   s       A/home/dcms/DCMS/lib/python3.12/site-packages/triton/tools/mxfp.py__init__zMXFP4Tensor.__init__   sr     dELL1Z3ZZ1++DK((.DI *4 7DIdXDIMNN    c                    t        j                  dd| j                  t         j                  | j                        }t        j                  dd| j                  t         j                  | j                        }t        j                  dd| j                  t         j                  | j                        }|dz  |dz  z  |z  j                  t         j                        | _        | S )Nr      r   dtyper	            )r   randintr   uint8r	   typer   )r   SEMs       r   randomzMXFP4Tensor.random#   s    MM!QTYYekk$++VMM!QTYYekk$++VMM!QTYYekk$++V1fa(1,225;;?	r   c                    |t         j                  k(  sJ d       | j                  }|dz	  dz  j                  |      }|dz	  dz  j                  |      }|dz  j                  |      }t        j                  |      }|dk(  |dk(  z  }| }|j                         r||   }	||   }
||   }t        j                  d|	      }t        j                  |
dk(  |
|
dz
        }t        j                  |
dk(  |dz  d|dz  z         }|t        j                  d|      z  |z  }|||<   |||dk(  z  xx   dz  cc<   |j                  t         j                        S )	z
        Convert fp4e2m1 data to float32.

        Returns:
        - A torch tensor of type dtype representing the fp4e2m1 data.
        zCCurrently only float32 is supported for fp4e2m1 to float conversionr   r   r         ?      ?r   )r   float32r   r    
zeros_likeanypowwhere)r   r   r   r!   r"   r#   valueis_zeronon_zero_maskS_nzE_nzM_nzsignexponentmantissavalue_nzs                   r   tozMXFP4Tensor.to+   s_    %l'll%yyai3$$U+ai3$$U+CZe$   #6a1f% ]#D]#D]#D99R&D{{419dD1H=H{{419dSj#s
:JKHeii844x?H#+E-  	ga !R'!zz%--((r   c                 r   t        j                  |      j                  t         j                        }t        j                  |      }|dk(  }t        j
                  |      t        j                  |      z  }t        j                  g dt         j                  | j                        }t        j                  ddgt         j                  | j                        }g }g }	g }
|D ]  }|dk(  rJd}|D ]B  }|dz  }|d|z  z  }|j                  |       |	j                  |       |
j                  |       D R|j                         dz
  }|D ]E  }d|dz  z   }|d|z  z  }|j                  |       |	j                  |       |
j                  |       G  t        j                  |t         j                  | j                        }t        j                  |	t         j                  | j                        }	t        j                  |
t         j                  | j                        }
|j                  d      }|j                  d   }|j                  d      }|j                         j                         }|||j                  d      <   t        j                  ||j                  d      z
        }t        j                   |dd	
      \  }}||k(  }|j#                         dkD  rK|
j                  d      j%                  |d      }|dk(  j                  t         j&                        }||dz  z
  }t        j(                  |d      }|	|   }|
|   }|j                  |j                        }|j                  |j                        }d||<   d||<   |dz  |dz  z  |z  j                  t         j                        S )a5  
        Convert float32 numbers to mxf4 e2m1 format.
        * No encodings are reserved for Inf or NaN in mxf4.
        * Conversion from float supports roundTiesToEven rounding mode.
        * If a value exceeds the mxf4 representable range after rounding,
          clamps to the maximum mxf4 magnitude, preserving the sign.
        * If a value has magnitude less than the minimum subnormal magnitude
          in mxf4 after rounding, converts to zero.

        Parameters:
        - values: A torch tensor of float32 numbers to convert to fp4 format.
        r   )r   r   r   r   r   r	   r   r'   r   r(   r&   T)dimkeepdimgư>r;   r   )r   signbitr    r   absisnanisinftensorr	   appenditemr)   viewshape	unsqueezemaxminsumexpandint32argmin)r   valuesr!   
abs_valuesr/   
is_invalidE_bitsM_bitscandidate_valuescandidate_Ecandidate_Mr"   r5   r#   significandr.   
candidatesabs_values_flatNabs_values_expandedmax_candidate_valueerrors
min_errors_is_tieM_bits_expandedtie_breakerbest_indices
E_selected
M_selecteds                                 r   r   zMXFP4Tensor._from_floatN   s7    MM&!&&u{{3YYv&
?[[(5;;v+>>

 l%++dkkRq!fEKKL 	*AAv *A"#c'K'1h;7E$++E2&&q)&&q)* 668a< *A"%C-K'1h;7E$++E2&&q)&&q)*	*( \\"2%--PTP[P[\
ll;ekk$++Vll;ekk$++V$//"-!!!$-77: )nn.335/B
+, .1E1Ea1HHI
 		&a>
AJ&::<!)33A6==aDO*a/55ekkBK{T12F||F2 .
 .
OOJ,,-OOJ,,-'
'
aAF#a'--ekk::r   c                 B   | j                   }d|cxk  r|j                  k  sJ d        J d       |j                  |      }|dz   dz  }|dz  dk7  r]dgd|j                  z  z  }|j                  |z
  dz
  dz  dz   }d||<   t        j                  j
                  j                  ||dd      }t        |j                        }|||<   |j                  |dz   d        |j                  | }|j                  |dz   d      }|j                  |dz   d      }	|	dz  |z  }
|
S )a  
        Packs two e2m1 elements into a single uint8 along the specified dimension.

        Parameters:
        - dim: The dimension along which to pack the elements.

        Returns:
        - A torch tensor of dtype uint8 with two e2m1 elements packed into one uint8.
        r   zHThe dimension to pack along is not within the range of tensor dimensionsr   r   constant)moder.   r   )r   ndimr   r   nn
functionalpadlistrF   insertreshapeselect)r   r;   r   size_along_dimnew_size_along_dim	pad_sizes	pad_index	new_shapelowhighpackeds              r   to_packed_tensorzMXFP4Tensor.to_packed_tensor   sD    yyC#$))# 	WV	W# 	WV	W# 3,q0Q6 A"q499}-IS1,1A5I#$Ii 88&&**4ST*UD$	+	#q!$t||Y'kk#'1%{{37A&!)s"r   c                    |dz	  dz  }|dz  }t        j                  ||f|dz         }t        |j                        }|d| ||   dz  gz   ||dz   d z   } |j                  | }	||   dz  dk7  r9t        d      g|	j                  z  }
t        d||         |
|<   |	t        |
         }	|	j                  t         j                        S )a  
        Unpacks a tensor where two fp4 elements are packed into a single uint8.

        Parameters:
        - packed_tensor: The packed tensor
        - dim: The dimension along which the tensor was packed.
        - original_shape: The shape of the original tensor before packing.

        Returns:
        - A tensor with the original data unpacked into uint8 elements containing one
          fp4e2m1 element in the least significant bits.
        r      r   r=   Nr   r   )
r   stackrl   rF   rn   slicerh   r   r    r   )r   packed_tensorr;   original_shaperv   ru   stackedrF   rt   r   indicess              r   unpack_packed_tensorz MXFP4Tensor.unpack_packed_tensor   s     "c)c!++sDksQw7 W]]#$3K5:>"22U378_D	w	* #"a'T{mdii/G N3$78GCLg'Dyy%%r   NNN)	__name__
__module____qualname__r   r$   r8   r   rx   r    r   r   r   r      s%    O*!)FV;p!F&r   r   c                   (    e Zd ZddZddZd Zd Zy)MXScaleTensorNc                    || _         |It        |t        j                        sJ d       |j                   | _         | j	                  |      | _        y|!t        |t              r|| _        y|f| _        yt        d      )a6  
        Tensor class for working with microscaling E8M0 block scale factors.

        Parameters:
        - data: A torch tensor of float32 numbers to convert to fp8e8m0 microscaling format.
        - size: The size of the tensor to create.
        - device: The device on which to create the tensor.
        Nr   r   r   r   s       r   r   zMXScaleTensor.__init__   sr     dELL1Z3ZZ1++DK((.DI *4 7DIdXDIMNNr   c                    d}|dn=t        dt        t        j                  t        j                  |                  |z         }|dnGt        dt        dt        t        j                  t        j                  |                  |z               }||k  sJ d       t        j                  ||dz   | j                  t        j                  | j                        }|| _
        | S )zp
        Generate random E8M0 data within a specified range.
        * Excludes the NaN encoding (255).
           r      z&Low must be less than or equal to highr   r   )rH   intr   log2rB   rI   r   r   r   r	   r   )r   ru   rv   biasmin_exponentmax_exponentr"   s          r   r$   zMXScaleTensor.random   s    
 KqSC

5<<PSCT8U4VY]4]-^"lsCQEJJu||\`OaDb@cfj@j9k0l|+U-UU+MM,q(8tyyPUP[P[dhdodop	r   c                    |t         j                  k(  sJ d       | j                  j                  |      }|dk(  }|j	                         }d||<   |dz
  }t        j
                  d|      }t         j                  ||<   |j                  |      S )NzBCurrently only float32 is supported for f8e8m0 to float conversion   r   r   g       @)r   r)   r   r    cloner,   nan)r   r   r   is_nane_biaseder.   s          r   r8   zMXScaleTensor.to  s    %k'kk%yy~~e$#+::<sN		#q!		fzz%  r   c                    t        j                  |t         j                  | j                        }t        j                  |      t        j
                  |      z  |dk  z  }d||<   ||    }t        j                  t        j                  |            }|dz   }|j                  t         j                        }t        j                  |dd      }|j                  t         j                        || <   |S )aO  
        Convert float32 numbers to E8M0 format.
        * Values <= 0, NaNs, and Infs are converted to the NaN encoding (255).
        * Positive values are converted by computing the floor of log2(value) to get the exponent.

        Parameters:
        - values: A torch tensor of float32 numbers to convert to E8M0 format.
        r:   r   r   r   r   )r   
empty_liker   r	   r@   rA   floorr   r    rL   clamp)	r   rN   resultrP   valid_valuesr   r   e_biased_inte_biased_clampeds	            r   r   zMXScaleTensor._from_float  s     !!&DKKP[[(5;;v+>>&A+N
 zzk*KK

<01s7}}U[[1 ;;|Q<.33EKK@
{r   r   )NN)r   r   r   r   r$   r8   r   r   r   r   r   r      s    O&	!r   r   )__doc__r   r   r   r   r   r   <module>r      s(    Z& Z&zD Dr   