
    Vh                     ^   d Z ddlZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
 ddlZddlmZmZ ddlmZ  ej"                  e      Zee eddd	d
      dde
e   fd              Z ej*                  ed      Z ej*                  ed      Zd Z ej2                  d      d        Zy)a*  
This module provides TVM backend integration for TorchDynamo.

Apache TVM is a deep learning compiler framework that can optimize and execute
models on various hardware backends. This module enables:

- Compilation of PyTorch models to TVM's computation graphs
- Multiple scheduling options:
  - Default scheduler
  - Auto-scheduler for automatic optimization
  - Meta-schedule for evolutionary search-based tuning
- Hardware-specific optimizations:
  - CUDA GPU support
  - CPU support with LLVM targeting and architecture-specific tuning
  - Automatic detection of CPU capabilities (AVX2, AVX512)
- Tensor conversion utilities between PyTorch and TVM formats
- Configurable optimization levels and tuning trials

The backend can be used with torch.compile():
    model = torch.compile(model, backend="tvm")
    N)MappingProxyType)Optional   )device_from_inputsfake_tensor_unsupported)register_backend N     )	schedulertrials	opt_level)optionsr   c                    dd l  ddl m} ddlm} t        j
                  j                  | |      }t        |      }t        |      D cg c]  \  }}d| |j                  f }	}} | | }
t        |
      dk(  r!t        j                  d       | j                  S |j                  j                  ||	      \  }}|j                   dk(  r6 j#                  |j$                        } j&                  j#                         }n4 j)                  d      } j&                  j+                  t-                     }|j/                  dd       }| t0        j2                  j/                  dd       }|j/                  d	d
      }|j/                  dd      }|dk(  r&ddl m} t7        j8                         }t0        j:                  j=                  |      s|j?                  |d   ||      \  }}t        |      dk7  rn|jA                  ||      }t0        j:                  j=                  |      s=|dkD  sJ |jC                  ||jE                  |      gd      }	 |jG                  |       |jM                  |      5   jN                  jQ                  |ddi      5  |jS                  |||      }d d d        d d d        n|dk(  rddl m*} t7        jV                         5 }|j                   dk7  rB j&                  j+                  t-                d|jX                  j[                  d             }|dkD  sJ |j\                  j_                  ||||d|d|      }|j\                  ja                  |||||      }d d d        nL|dk(  s|s: jN                  jQ                  |       5  |jS                  |||      }d d d        ntc        d!      |je                   d   |            d"  fd#fd$}|S c c}}w # tH        $ r6 t0        j:                  j=                  |      rt1        jJ                  |        w xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w)%Nr   )relay)graph_executorinp_z0Explicitly fall back to eager due to zero outputcudar   TVM_SCHEDULERr   r	   r   r
   auto_scheduler)r   maini  )num_measure_trialsmeasure_callbacksearly_stoppingz relay.backend.use_auto_schedulerT)r   config)targetparamsmeta_schedule)r   z --num-cores F)logical@   evolutionary)modr   work_dirmax_trials_globalnum_trials_per_iterr   strategyr   )databaser!   r   r   r   default)r   zThis tuning option is invalid/not implemented for torchdynamo's TVM-related backend. There are three available options: default, auto_scheduler and meta_schedule.c                     | j                   dk(  r#t        j                  | j                               S t        j                  j
                  j                  | j                               S )z8A helper function to transfer a NDArray to torch.tensor.bool)dtypetorch
from_numpynumpyutilsdlpackfrom_dlpack	to_dlpack)	nd_tensors    J/home/dcms/DCMS/lib/python3.12/site-packages/torch/_dynamo/backends/tvm.pyto_torch_tensorztvm.<locals>.to_torch_tensor   sL    ??f$ ##IOO$566{{!!--i.A.A.CDD    c                     | j                   t        j                  k(  r7j                  j	                  | j                         j                               S j                  j                  |       S )z8A helper function to transfer a torch.tensor to NDArray.)r*   r+   r)   ndarraycpur-   r0   )torch_tensortvms    r3   to_tvm_tensorztvm.<locals>.to_tvm_tensor   sQ    + 66<< 0 0 2 8 8 :;;vv!!,//r5   c                  X   | D cg c]  }|j                          }}j                         \  }}|j                         D ch c]  \  }}|	 }}}t        |d      D ]m  \  }}|j	                         dk7  s|j
                  r|j                         }d| }	|	|vrt        j                  d|	       Vj                  |	 |             o j                          t        j                               D 
cg c]  }
 j                  |
             c}
S c c}w c c}}w c c}
w )Nr   r   z6input %s skipped as not found in tvm's runtime library)
contiguousget_input_infoitems	enumeratedimrequires_graddetachlogwarning	set_inputrunrangeget_num_outputs
get_output)i_argsaargs
shape_info_nameactive_inputsidxarginp_nameimr4   r<   s              r3   exec_tvmztvm.<locals>.exec_tvm   s   (./1//((*
A-7-=-=-?@'$@@!$* 	HCwwyA~$$**,C!#<=0KKP  !#&	 	
:?@Q@Q@S:TUQQ0UU' 0@" Vs   DD!;D')3r;   r   tvm.contribr   r+   jittracer   rA   shapelenrE   rF   forwardfrontendfrom_pytorchtyper   indexr   r9   Targetllvm_targetgetosenvironr   tempfileNamedTemporaryFilepathexistsextract_tasksTaskSchedulerTuningOptionsRecordToFiletune	ExceptionunlinkApplyHistoryBest	transformPassContextbuildr   TemporaryDirectoryr.   	cpu_countrelay_integration
tune_relaycompile_relayNotImplementedErrorGraphModule)!gmexample_inputsr   r   r   jit_moddevicerS   rV   
shape_listexample_outputsr!   r   devr   r   r   r   r   log_filetaskstask_weightstunertune_optionlibmsr"   r&   rX   rW   r4   r<   r;   s!                                @@@@r3   r;   r;   +   s:    *iioob.1G/F8A.8QRfc1T#<)RJR.)O
?q FGzz..--gzBKC{{fhhv||$"ggaj"";=1K.IJJNN?D9	[[5)FK+I$$&..0ww~~h'"0">">FVV#E< 5zQ&44ULIww~~h/!A:%:"0">">+1+9+F+Fx+P*Q'+ #? #K


;/ ,,X6 	E**#-OQU,V +  E kk#fVkDE	E 	E
 
o	%+((* 	h{{f$ **"}o]2883E3Ee3E3T2UV
 A::++66!"($&'# 7 	H &&44!# 5 C)	 	6 
i	y]]&&&; 	A++c&+@C	A 	A "\
 	
 	"">3y>##67AE0V, OE SP % 77>>(3IIh/E E	E 	E	 	:	A 	AsO   O7O= 5 QP?*QBQ#Q$=?P<?Q		QQQ!$Q-r   )r   r   c                  N    	 t        j                  d       y# t        $ r Y yw xY w)Nr;   TF)	importlibimport_moduleImportError r5   r3   has_tvmr      s*    & s    	$$c                  p    t         j                  dk(  r#t        d      j                         } d| v ryd| v ryy)Nlinuxz/proc/cpuinfoavx512zllvm -mcpu=skylake-avx512avx2zllvm -mcpu=core-avx2llvm)sysplatformopenread)cpuinfos    r3   rd   rd      s:    
||w',,.w.w)r5   )__doc__	functoolsr   loggingrf   r   rh   typesr   typingr   r+   commonr   r   registryr   	getLogger__name__rE   r;   partialtvm_meta_scheduletvm_auto_schedulerr   	lru_cacherd   r   r5   r3   <module>r      s   ,    	 
  "   ? & g! 
 +;e!<+	P &'	P  Pf &I%%c_E &Y&&s6FG  T r5   