
    Vh#6                       U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZmZmZ d dlmZ d dlmZ d dlmZ erd d	lmZ d ad ad ag ad
ed<   g Zded<   d ad aej                   G d d             Z g a!ded<   d a"d a#d a$d a%d5dZ&e G d d             Z'd6dZ( G d d      Z)i Z*ded<   e G d d             Z+e+jY                  dg d       e+jY                  dg d       e+jY                  d g d!       e+jY                  d"g d#       e+jY                  d$g d%       d7d&Z-d8d'Z.d9d(Z/	 	 	 	 	 	 d:d)Z0d;d*Z1d8d+Z2d<d,Z3d=d-Z4	 	 	 	 	 	 d>d.Z5	 	 	 	 	 	 	 	 d?d/Z6d5d0Z7d@d1Z8e	dAd2       Z9dBd3Z:dCd4Z;y)D    )annotationsN)	dataclass)	lru_cache)CallablecastOptionalTYPE_CHECKINGUnion)config)get_benchmark_name)
OrderedSet)BaseSchedulerNodez#list[tuple[BaseSchedulerNode, int]]nodes_num_elemz%list[tuple[BaseSchedulerNode, float]]node_runtimesc                  &    e Zd ZU ded<   dZded<   y)CppOuterLoopFusedCountintinner_kernel_numberr   local_buffer_numberN)__name__
__module____qualname____annotations__r        G/home/dcms/DCMS/lib/python3.12/site-packages/torch/_inductor/metrics.pyr   r   (   s      r   r   zlist[CppOuterLoopFusedCount]!cpp_outer_loop_fused_inner_countsc                     da dadat        j	                          t
        j	                          dadat        j	                          da	da
daday )Nr   )generated_kernel_countgenerated_cpp_vec_kernel_countnum_bytes_accessedr   clearr   ir_nodes_pre_fusioncpp_to_dtype_countr   num_comprehensive_padding)num_matches_for_scatter_upon_const_tensornum_loop_reorderingparallel_reduction_countr   r   r   resetr)   ;   s^     %&"%++- !01- r   c                  N    e Zd ZU dZded<   ded<   ded<   ded<   ded<   ded<   y	)
CachedMetricsDeltasz]
    The subset of metrics we want update across cache hits, e.g., the
    FxGraphCache.
    r   r   r    r#   r$   r!   r&   N)r   r   r   __doc__r   r   r   r   r+   r+   U   s-    
  $''/22r   r+   c                 n    t        j                  t              D  cg c]  } | j                   c} S c c} w N)dataclassesfieldsr+   name)fields    r   get_metric_fieldsr3   d   s&    $/$6$67J$KL5EJJLLLs   2c                  2    e Zd ZdZddZddZedd       Zy)	CachedMetricsHelperz
    A helper class to help calculate and apply counter deltas for those
    metrics we want to save with cache entries (e.g., FxGraphCache) and
    apply on a cache hit.
    c                d    i | _         t               D ]  }t               |   | j                   |<    y r.   )cached_metricsr3   globals)selfmetrics     r   __init__zCachedMetricsHelper.__init__o   s3     ') 	<F*1)F*;D'	<r   c                z    i }t               D ]"  }t               |   | j                  |   z
  ||<   $ t        di |S )Nr   )r3   r8   r7   r+   )r9   delta_metricsr:   s      r   
get_deltaszCachedMetricsHelper.get_deltast   sL    ') 	TF$+If$58K8KF8S$SM&!	T #3]33r   c                `    t               D ]!  }t               |xx   t        | |      z  cc<   # y r.   )r3   r8   getattr)deltar:   s     r   apply_deltasz CachedMetricsHelper.apply_deltas{   s-    ') 	8FIf!77	8r   NreturnNone)rD   r+   )rA   r+   rD   rE   )r   r   r   r,   r;   r>   staticmethodrB   r   r   r   r5   r5   h   s%    <
4 8 8r   r5   zdict[str, MetricTable]REGISTERED_METRIC_TABLESc                  j    e Zd ZU ded<   ded<   dZded<   	 	 	 	 ddZdd	Zdd
ZddZe	dd       Z
y)MetricTablestr
table_name	list[str]column_namesr   r   num_rows_addedc                p   | j                   t               vry  |       }t        | j                        t        |      k(  s(J t        | j                         dt        |              t	        | j                        t	        |j                               k(  s6J t	        | j                         dt	        |j                                       t               }|g| j                  D cg c]  }||   	 c}z   }t        d |D              sJ | j                  t        t        t           |             y c c}w )Nz v.s. c              3  <   K   | ]  }t        |t                y wr.   )
isinstancerJ   ).0is     r   	<genexpr>z&MetricTable.add_row.<locals>.<genexpr>   s     3!:a%3s   )rK   enabled_metric_tableslenrM   r   keysr   all
_write_rowr   listrJ   )r9   row_fnrow_dictbncolumn_namerows         r   add_rowzMetricTable.add_row   s    ??"7"9984$$%X6 	
4$$%&fS]O<	
6 $++,
8==?0KK 	
$++,-VJx}}4O3PQ	
K  !dt?P?PQh{+QQ3s3333T#Y,- Rs   'D3c                "    d| j                    dS )Nmetric_table_z.csv)rK   )r9   s    r   output_filenamezMetricTable.output_filename   s    t/t44r   c                    | j                         }t        |d      5 }t        j                  |d      }|j	                  dg| j
                  z          d d d        y # 1 sw Y   y xY w)Nw
lineterminator
model_name)rc   opencsvwriterwriterowrM   )r9   filenamefdrl   s       r   write_headerzMetricTable.write_header   s^    '')(C  	@BZZ48FOO\NT->->>?	@ 	@ 	@s   7AA&c                   | j                         }| j                  dk(  r/t        j                  j	                  |      s| j                          | xj                  dz  c_        t        |      D ]&  \  }}t        |t              r|d}n|d}n|}|||<   ( t        |d      5 }t        j                  |d      }|j                  |       d d d        y # 1 sw Y   y xY w)Nr      z.6f arf   rg   )rc   rN   ospathexistsrp   	enumeraterQ   floatrj   rk   rl   rm   )r9   r_   rn   idxorig_valnew_valro   rl   s           r   rY   zMetricTable._write_row   s    '')!#BGGNN8,Dq &s^ 	MC(E*%cN!"CH	 (C  	!BZZ48FOOC 	! 	! 	!s   $)CCc                .    t        | |      }|t        | <   y r.   )rI   rG   )r1   rM   tables      r   register_tablezMetricTable.register_table   s    D,/). &r   N)r[   z4Callable[[], dict[str, Optional[Union[str, float]]]]rD   rE   )rD   rJ   rC   )r_   rL   rD   rE   )r1   rJ   rM   rL   rD   rE   )r   r   r   r   rN   r`   rc   rp   rY   rF   r   r   r   r   rI   rI      sP    ONC.J.	.(5@!( / /r   rI   slow_fusion)kernel1_pathkernel1_latencykernel2_pathkernel2_latencyfused_kernel_pathfused_kernel_latencyslow_down_ratiograph_stats)graph_idnum_nodes_before_fusionnum_nodes_after_fusionpersistent_red_perf)
kernel0_pathr   r   kernel3_pathkernel0_latencyr   r   kernel3_latency
size_hintsreduction_hint'fusion_failure_due_to_indexing_mismatch)pre_grad_graph_idpost_grad_graph_id
node1_name
node2_namenode1_debug_strnode2_debug_strcommon_buffer_namesfailure_reasonkernel_metadatakernel_namekernel_pathkernel_categoryr   r   line_of_codenum_load	num_storenum_for_loopnum_atomic_addnum_argsxnumelynumelrnumelkernel_args_num_gbc                    ddl m} ddlm} |j	                  |       } ||      }t        j                  |j                  j                        S )z
    The kernel_module_code is the python module that contains kernel function code.
    kernel function is the proper triton kernel function annotated with
    @triton.jit
    rr   )PyCodeCache)get_triton_kernel)	codecacher   wrapper_benchmarkr   loadinspect	getsourcefn)kernel_module_coder   r   modkernels        r   _parse_kernel_fn_coder     s@     '4


-
.Cs#F VYY\\**r   c                4    t        | j                               S )zJ
    Return the line of code for the kernel excluding the decorators.
    )rV   
splitlines)proper_kernel_fn_codes    r   _parse_kernel_line_of_coder   &  s     $//122r   c                n    |dk(  ry t        j                  d|       }|sJ d       |j                  d      S )Nforeachzsize_hints=(\[[0-9, ]*\]),zsize_hints missing!rr   researchgroup)r   r   ms      r   _parse_size_hintsr   -  s;    )#
		/1CDA###1771:r   c                l    | dvry t        j                  d|      }|sJ d       |j                  d      S )N)	reductionpersistent_reductionz$reduction_hint=ReductionHint\.(\w*),z/reduction_hint not found in kernel source code!rr   r   )r   r   r   s      r   _parse_reduction_hintr   6  s>     CC
		9;MNA???1771:r   c                $    | j                  |      S r.   )count)r   patterns     r   _count_patternr   @  s     &&w//r   c                    | j                         d   }|j                  d      sJ |j                  d      }|j                  d      }||dz   | }|j                  d      }t	        |      S )Nr   def (z):rr   ,)r   
startswithindexsplitrV   )r   def_line	start_idxend_idxdecl_csvcompss         r   _count_argsr   D  sm    $//1!4Hv&&&s#InnT"G	A0HNN3Eu:r   c                .    | j                  d      }| |d S )z
    Skip decorators.
    r   N)r   )kernel_fn_code	start_poss     r   _parse_proper_kernel_fn_coder   N  s!     $$V,I)*%%r   c                n    t        j                  | d|       }|rt        |j                  d            S y )Nz
 = ([\d]+)rr   )r   r   r   r   )r   numel_arg_namer   s      r   _parse_numelr   V  s3    
		^$K02GHA1771:r   c                j    t        j                  d|       }|rt        |j                  d            S 	 y)z
    inductor meta looks like:
        inductor_meta={... 'mutated_arg_names': [], 'no_x_dim': False, 'kernel_num_gb': 2.0},
    z.kernel_num_gb.:\s*([0-9.]+)rr   N)r   r   ry   r   )r   r   r   s      r   _parse_kernel_args_num_gbr   ^  s5     			1>BAQWWQZ  	 r   c           
         	 ddl m}  ||      t        |      t        |      	t	        |      t              t              t        d      j                   	fd       y)z
    An utility to log kernel metadata. We may parse metadata from kernel source code here.

    It's fine to parse the generated kernel code here since the logging is
    disabled by default. It would hurt compilation time.
    rr   )"get_kernel_category_by_source_coder   c                      t        d      t        d      t        d      t        d      t              t        d      t        d      t        d      t               dS )	Nztl.loadztl.storezfor ztl.atomic_addr   r   r   r   )r   r   r   r   )r   r   kernel_line_of_coder   r   r   r   r   s   r   <lambda>z%log_kernel_metadata.<locals>.<lambda>  s    &&.$,/&'<iH'(=zJ*+@&I,-BOT#$9:"#8(C"#8(C"#8(C";#
 r   N)	r   r   r   r   r   r   r   get_metric_tabler`   )
r   r   r   r   r   r   r   r   r   r   s
   ``  @@@@@@r   log_kernel_metadatar   s  sp     F89KLO*?<NON"#5GJ*+=>N8H 55JK&'//	
 	
r   c                     t         j                         D ]f  \  } }| t               v s|j                         }t        j
                  j                  |      rt	        j                  |       |j                          h y)z
    Purge the old log file at the beginning when the benchmark script runs.
    Should do it in the parent process rather than the child processes running
    each individual model.
    N)	rG   itemsrU   rc   ru   rv   rw   unlinkrp   )r1   r~   rn   s      r   purge_old_log_filesr     sb     0557 !e(**,,.Hww~~h'		(# !r   c                 4    t        t        j                        S r.   )enabled_metric_tables_implr   rU   r   r   r   rU   rU     s    %f&B&BCCr   c                    t        t                  }| j                  d      D ]9  }|j                         }|s|t        v sJ d| d       |j                  |       ; |S )Nr   zMetric table name z is not registered)r   rJ   r   striprG   add)
config_strenabledr1   s      r   r   r     sq    oG  % zz|// 	
 &89	
/ 	D Nr   c                    | t               v S r.   )rU   r1   s    r   is_metric_table_enabledr     s    (***r   c                :    | t         v sJ d|  d       t         |    S )NzMetric table z is not defined)rG   r   s    r   r   r     s*    ++R}TF/-RR+#D))r   rC   )rD   rL   )r   rJ   rD   rJ   )r   rJ   rD   r   )r   rJ   r   rJ   rD   Optional[str])r   rJ   r   rJ   rD   r   )r   rJ   r   rJ   rD   r   )r   rJ   rD   rJ   )r   rJ   r   rJ   rD   zOptional[int])r   rJ   r   rJ   rD   zOptional[float])r   rJ   r   rJ   r   rJ   rD   rE   )rD   OrderedSet[str])r   rJ   rD   r   )r1   rJ   rD   bool)r1   rJ   rD   rI   )<
__future__r   rk   r/   r   ru   r   r   	functoolsr   typingr   r   r   r	   r
   torch._inductorr   torch._inductor.utilsr   torch.utils._ordered_setr   torch._inductor.schedulerr   r   r    r!   r   r   r   r#   r$   r   r   r%   r&   r'   r(   r)   r+   r3   r5   rG   rI   r   r   r   r   r   r   r   r   r   r   r   r   rU   r   r   r   r   r   r   <module>r     s<   " 
   	 	 !  A A " 4 / ;  !"       8:4 9     ! ! ! CE !#? D ,- )   !4 3 3 3M8 82 46 0 5 :/ :/ :/z         "   -	   6+ 3.10&*-*))#&)<?)	)X!D 
 
+*r   