
    VhB5                        d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	m
Z
mZ d dlZd dlmZ ej                  dk(  ZdedefdZ G d d	      Ze j&                   G d
 de             Ze j&                   G d de             Ze j&                   G d de             Ze j&                   G d de             Ze j&                   G d de             Ze j&                   G d de             Ze j&                   G d de             Z G d de      Zdee   fdZ e       Z e        e        e        e        e       gZdeedf   dee   defdZ  ejB                  d      dee   fd       Z"defd Z#y)!    N)AnyCallableUnion)configwin32	isa_flagsreturnc                 d    ddl m}m}  | |             }t        j                  }| d|  d| }|S )Nr   )get_compiler_version_infoget_cpp_compiler=)torch._inductor.cpp_builderr   r   torch__version__)r   r   r   compiler_infotorch_versionfingerprints         K/home/dcms/DCMS/lib/python3.12/site-packages/torch/_inductor/cpu_vec_isa.py _get_isa_dry_compile_fingerprintr      s=     X-.>.@AM%%M"O1YKq@K    c                   &   e Zd ZU eed<   ee   ed<   eed<   eej                  ef   ed<   dZ
dZdefdZej                  fd	ej                  defd
Zdee   fdZdefdZdefdZdedefdZdefdZ ej,                  d      defd       Zy)VecISA
_bit_width_macro_arch_flags_dtype_nelementsa  
#if defined(CPU_CAPABILITY_AVX512) || defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_ZVECTOR) || defined(CPU_CAPABILITY_NEON) || defined(CPU_CAPABILITY_VSX) || defined(CPU_CAPABILITY_SVE)
#include <ATen/cpu/vec/functional.h>
#include <ATen/cpu/vec/vec.h>
#endif

alignas(64) float in_out_ptr0[16] = {0.0};

extern "C" void __avx_chk_kernel() {
    auto tmp0 = at::vec::Vectorized<float>(1);
    auto tmp1 = tmp0.exp();
    tmp1.store(in_out_ptr0);
}
zG
import torch
from ctypes import cdll
cdll.LoadLibrary("__lib_path__")
r	   c                     | j                   S N)r   selfs    r   	bit_widthzVecISA.bit_widthL   s    r   dtypec                      | j                   |   S r   )r   )r    r"   s     r   	nelementszVecISA.nelementsO   s    $$U++r   c                     | j                   S r   )r   r   s    r   build_macrozVecISA.build_macroR   s    {{r   c                     | j                   S r   )r   r   s    r   build_arch_flagszVecISA.build_arch_flagsU   s    r   c                 *    t        t        |             S r   )hashstrr   s    r   __hash__zVecISA.__hash__X   s    CIr   codec                 @   ddl m}m}m} ddlm}m}m}  ||dt        | j                              \  }}	ddl
m}
  |       } |
t        j                  j                  ||dz         |      }|5  t        j                  j                  |	      } || d	
      } |||	g||      }	  ||j!                               }t        j                  j#                  |      s|j%                          t'        j(                  t*        j,                  dt.        j0                  j3                  d|      g|t&        j4                  i t        j6                  ddj                  t*        j                        i       	 d d d        y# t8        $ r Y d d d        y	w xY w# 1 sw Y   y xY w)Nr   )get_lock_dirLOCK_TIMEOUTwrite)
CppBuilderCppTorchOptionsnormalize_path_separatorcpp)extra)FileLockz.lock)timeoutF)vec_isawarning_allz-c__lib_path__
PYTHONPATH:)cwdstderrenvT)torch._inductor.codecacher/   r0   r1   r   r2   r3   r4   r   r   torch.utils._filelockr7   ospathjoindirnameget_target_file_pathisfilebuild
subprocess
check_callsys
executabler   _avx_py_loadreplaceDEVNULLenviron	Exception)r    r-   r/   r0   r1   r2   r3   r4   key
input_pathr7   lock_dirlock
output_dirbuid_optionsx86_isa_help_builderoutput_paths                    r   check_buildzVecISA.check_build[   st   OO	
 	
  243C3CD
Z
 	3>XsW}=|T 	4J*4UKL#-	$ 6(==? ww~~k2(..0 %%++33NKP
 #%--H2::H|SXXchh5GH	 ?	 	8  ;	 	89	 	s1   06F'CE=3F=	FFFFFc                 T    | j                  t        j                  j                        S r   )_VecISA__bool__implr   r5   
vec_isa_okr   s    r   __bool__zVecISA.__bool__   s      !6!677r   Nc                 r    ||S t        j                         ry| j                  t        j                        S )NT)r   	is_fbcoder[   r   	_avx_code)r    r^   s     r   __bool__implzVecISA.__bool__impl   s4    ! 0 011r   )__name__
__module____qualname__int__annotations__listr+   dictr   r"   rb   rN   r!   floatr$   r&   r(   r,   boolr[   r_   	functools	lru_cacher]    r   r   r   r   "   s    OI5;;+,,"IL3  .3[[ ,u{{ ,S ,T#Y  #  # 0 0 0d8$ 8 Y2$ 2 2r   r   c                       e Zd ZU dZddgZdZej                  dej                  dej                  diZ
defdZej                  Zeegef   ed	<   y
)VecNEON   CPU_CAPABILITY_NEONAT_BUILD_ARM_VEC256_WITH_SLEEF       r	   c                 .    t        j                         ryyNneonasimdr   ra   r   s    r   __str__zVecNEON.__str__       r   r,   Nrd   re   rf   r   r   r   r   rk   bfloat16float16r   r+   r}   r   r,   r   r   rh   ro   r   r   rq   rq      s]    J#%EFFKQ5==!L 
 )/Hhx}%7r   rq   c                       e Zd ZU dZg dZdZej                  dej                  dej                  diZ
defdZej                  Zeegef   ed<   y	)
	VecSVE256   )CPU_CAPABILITY_SVECPU_CAPABILITY_SVE256rt   z(-march=armv8-a+sve -msve-vector-bits=256rw      r	   c                 .    t        j                         ryyry   r|   r   s    r   r}   zVecSVE256.__str__   r~   r   r,   Nr   ro   r   r   r   r      s]     JF
 =KQEMM2N 
 )/Hhx}%7r   r   c                       e Zd ZU dZdgZesdndZej                  dej                  dej                  diZdefdZej                  Zeegef   ed	<   y
)	VecAVX512i   CPU_CAPABILITY_AVX512z0-mavx512f -mavx512dq -mavx512vl -mavx512bw -mfmaz/arch:AVX512r       r	   c                      y)Navx512ro   r   s    r   r}   zVecAVX512.__str__   s    r   r,   Nrd   re   rf   r   r   _IS_WINDOWSr   r   rk   r   r   r   r+   r}   r   r,   r   r   rh   ro   r   r   r   r      si    J%&F  	; 
 RU]]BO  )/Hhx}%7r   r   c                        e Zd ZU ej                  dz   Zdef fdZej                  Ze	ege
f   ed<   dZ ej                  d      def fd       Z xZS )VecAMXz! -mamx-tile -mamx-bf16 -mamx-int8r	   c                 &    t         |          dz   S )Nz	 amx_tile)superr}   r    	__class__s    r   r}   zVecAMX.__str__   s    w ;..r   r,   aS  
#include <cstdint>
#include <immintrin.h>

struct amx_tilecfg {
  uint8_t palette_id;
  uint8_t start_row;
  uint8_t reserved_0[14];
  uint16_t colsb[16];
  uint8_t rows[16];
};

extern "C" void __amx_chk_kernel() {
  amx_tilecfg cfg = {0};
  _tile_loadconfig(&cfg);
  _tile_zero(0);
  _tile_dpbf16ps(0, 1, 2);
  _tile_dpbusd(0, 1, 2);
}
Nc                     t         |          rSt        j                         ry| j	                  t
        j                        rt        j                  j                         ryy)NFT)
r   r_   r   ra   r[   r   	_amx_coder   cpu	_init_amxr   s    r   r_   zVecAMX.__bool__   sF    7! 0 01eii6I6I6Kr   )rd   re   rf   r   r   r+   r}   r   r,   r   r   rh   r   rm   rn   rl   r_   __classcell__)r   s   @r   r   r      sj    ''*MMK/ / )/Hhx}%7I* Y$  r   r   c                       e Zd ZU dZdgZesdndZej                  dej                  dej                  diZdefdZej                  Zeegef   ed	<   y
)VecAVX2r   CPU_CAPABILITY_AVX2z-mavx2 -mfma -mf16cz
/arch:AVX2rw   r   r	   c                      y)Navx2ro   r   s    r   r}   zVecAVX2.__str__   s    r   r,   Nr   ro   r   r   r   r      sd    J#$F%0l  QEMM2N  )/Hhx}%7r   r   c                       e Zd ZU dZg dZdZej                  dej                  dej                  diZ
defdZej                  Zeegef   ed<   y	)

VecZVECTORr   )CPU_CAPABILITY_ZVECTORzCPU_CAPABILITY=ZVECTORHAVE_ZVECTOR_CPU_DEFINITIONz-mvx -mzvectorrw   r   r	   c                      y)Nzvectorro   r   s    r   r}   zVecZVECTOR.__str__  s    r   r,   Nr   ro   r   r   r   r     s[    JF
 #KQEMM2N  )/Hhx}%7r   r   c                       e Zd ZU dZdgZdZej                  dej                  dej                  diZ
defdZej                  Zeegef   ed<   y	)
VecVSXr   CPU_CAPABILITY_VSXz-mvsxrw   r   r	   c                      y)Nvsxro   r   s    r   r}   zVecVSX.__str__      r   r,   Nr   ro   r   r   r   r     sZ    J"#FKQEMM2N  )/Hhx}%7r   r   c                   f    e Zd ZU dZdgZdZi ZdefdZde	fdZ
ej                  Zeegef   ed<   y)InvalidVecISAr   ru   r	   c                      y)NINVALID_VEC_ISAro   r   s    r   r}   zInvalidVecISA.__str__*  s     r   c                      y)NFro   r   s    r   r_   zInvalidVecISA.__bool__-  r   r   r,   N)rd   re   rf   r   r   r   r   r+   r}   rl   r_   r   r,   r   r   rh   ro   r   r   r   r   $  sL    JTFK! !$  )/Hhx}%7r   r   c                  z   g } dt         t           dt        dt        dd fd}t        j                         }	 |dk7  r|dk7  r| S t
        j                  j                         }t
        j                  j                         }t
        j                  j                         } || |d        || |d	        || |d
       | S )Ndestisa_supportedisa_namer	   c                 ,    |r| j                  |       y y r   )append)r   r   r   s      r   _check_and_append_supported_isaz8x86_isa_checker.<locals>._check_and_append_supported_isa6  s     KK! r   x86_64AMD64r   r   amx_tile)
ri   r+   rl   platformmachiner   r   _is_avx2_supported_is_avx512_supported_is_amx_tile_supported)supported_isar   Archr   r   r   s         r   x86_isa_checkerr   3  s    !M"3i"(,"8;"	" D xDGO99'')DYY++-Fyy//1H#M4@#M68D#M8ZHr   
capabilityvec_isa_listinvalid_vec_isac                     dddddd}| |j                         v r$||    }|dk(  r|S |D ]  }|t        |      v s|c S  | rt        j                  d|         |d   S )	Nr   r   r   r   r   )defaultr   r   r   r   z/ignoring invalid value for ATEN_CPU_CAPABILITY r   )keysr+   warningswarn)r   r   r   capability_to_isa_strisa_strr9   s         r   get_isa_from_cpu_capabilityr   R  s     % *//11'
3''""# 	G#g,&	 G
|TU?r   c                     g } t         j                  dk(  r0t        j                         dk(  r| j                  t	                      t         j                  dvr| S t        j
                         }|dk(  rt        d      5 }	 |j                         }|sn_t        j                  d|      }|rF|j                         D ]3  }t        j                  d|      s| j                  t                       n r	 d d d        | S |dk(  r| j                  t                      | S |d	k(  rat        j                  j                   j#                         d
k(  r| j                  t%                      | S | j                  t	                      | S |dv r)	 t'               | j)                  fdt*        D               | S # 1 sw Y   | S xY w)Ndarwinarm)linuxr   s390xz/proc/cpuinfoz^features\s*:\s*(.*)$z[\^ ]+vxe[\$ ]+ppc64leaarch64SVE256r   r   c              3   |   K   | ]3  }t        fd t        |      j                         D              r|r| 5 yw)c              3   &   K   | ]  }|v  
 y wr   ro   ).0flag_cpu_supported_x86_isas     r   	<genexpr>z/valid_vec_isa_list.<locals>.<genexpr>.<genexpr>  s     Od411Os   N)allr+   split)r   isar   s     r   r   z%valid_vec_isa_list.<locals>.<genexpr>  s5      
Oc#hnn>NOOTW 
s   9<)rL   r   	processorr   rq   r   openreadlinerematchgroupssearchr   r   r   backendsr   get_cpu_capabilityr   r   extendsupported_vec_isa_list)isa_listarch	_cpu_infolinefeaturesmatchgroupr   s         @r   valid_vec_isa_listr   r  s   H
||xH$6$6$8E$A	"
||--Dw/" 	"i ))+ ")A4 H !.!5!5!7 "99%7?$OOJL9!"  		"< O% 
	!" O! 
	>>002h>OOIK( O OOGI& O 
$	$	 "1!2 
-
 	
 O=	"< Os   ?AF3F33F=c                     t        j                         r t        j                         dv r
t	               S t               } | st        S t         j                  j                  $t        t        j                  d      | t              S | D ]1  }t         j                  j                  |j                         k(  s/|c S  t        S )Nr   ATEN_CPU_CAPABILITY)r   ra   r   r   r   r   r   r5   simdlenr   rC   getenvr!   )_valid_vec_isa_listr   s     r   pick_vec_isar     s    x//15HHy(:(< zz!*II+,.A?
 	
 # ::0J r   )$dataclassesrm   rC   r   r   rJ   rL   r   typingr   r   r   r   torch._inductorr   r   r+   r   r   	dataclassrq   r   r   r   r   r   r   r   ri   r   r   r   r   rn   r   r   ro   r   r   <module>r      s     	  	  
  ' '  " llg%  v2 v2r 8f 8 8 8 8 8& 8 8 8  $Y $ $N 8f 8 8 8 8 8  	8V 	8 	88F 8c 6  / (IKGIy{S c4i v, #@ T(DL ( (Vf r   