
    VhO                     h   d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlZd dlmZ g dZ G d d	e      Zd
 Zd Zd Zd Z G d d      Z G d d      Z edg d      Z G d de      Z G d de      Z G d de      Z G d d      Zd ZdZdZ d Z!d"d Z"	 	 	 	 	 	 	 	 	 d#d!Z#y)$    N)defaultdict
namedtuple)
attrgetter)AnyOptional)
deprecated)
DeviceType)	EventListFormattedTimesMixinIntervalKernelFunctionEventFunctionEventAvgStringTableMemRecordsAccc                        e Zd ZdZ fdZd Zd Zd Zd Zd Z	e
d        Z	 	 	 	 	 	 	 dd	Zd
 Zd ZdedefdZ	 	 	 ddZd Z xZS )r
   z'A list of Events (for pretty printing).c                     |j                  dd       }|j                  dd      }|j                  dd      }t        |   |i | || _        || _        d| _        || _        y )N
use_deviceprofile_memoryF
with_flops)popsuper__init___use_device_profile_memory_tree_built_with_flops)selfargskwargsr   r   r   	__class__s         L/home/dcms/DCMS/lib/python3.12/site-packages/torch/autograd/profiler_util.pyr   zEventList.__init__   sf    ZZd3
$4e<ZZe4
$)&)%- %    c                 r    | j                          | j                          | j                          d| _        y )NT)_populate_cpu_children_remove_dup_nodes_set_backward_stacktracesr   r   s    r"   _build_treezEventList._build_tree'   s.    ##% &&(r#   c                 "    | j                         S N)tabler(   s    r"   __str__zEventList.__str__-   s    zz|r#   c                    	 t               }t        t        |             D ]  }| |   j                  | |   j                  j                  | |   j                  k(  s=t        | |   j                  j
                        dk(  sc| |   j
                  | |   j                  _        | |   j                  | |   j                  _        | |   j
                  D ]  }| |   j                  |_         |j                  |        t        |      dk(  ry t        |       D cg c]  \  }}||vs| }}}| j                          | j                  |       Rc c}}w )N   r   )setrangelen
cpu_parentnamecpu_childrenkernelsadd	enumerateclearextend)r   	to_deleteidxchindevnew_evtss          r"   r&   zEventList._remove_dup_nodes0   s2   ISY' 
'I((4S	,,11T#Y^^CDI00==>!C8<S	8N8NDI((53793D3DDI((0"3i44 =(,S	(<(<=MM#&
' 9~"*3D/RwsBS	=QRHRJJLKK!#  Ss   E*Ec                    | D cg c]-  }|j                   s|j                  t        j                  k(  r|/ }}t	        |t        d            }t        j                  |d       }|D ]  \  }}t	        |d       }g }|D ]  }	t        |      dkD  r|d   }
|	j                  j                  |
j                  j                  k\  s-|	j                  j                  |
j                  j                  kD  r|j                          nC|
j                  |	       |	j                  J d|	j                          |	j!                  |
       nt        |      dkD  r|j#                  |	         yc c}w )	a4  Populate child events into each underlying FunctionEvent object.

        One event is a child of another if [s1, e1) is inside [s2, e2). Where
        s1 and e1 would be start and end of the child event's interval. And
        s2 and e2 start and end of the parent event's interval

        Example: In event list [[0, 10], [1, 3], [3, 4]] would have make [0, 10]
        be a parent of two other intervals.

        If for any reason two intervals intersect only partially, this function
        will not record a parent child relationship between then.
        thread)keyc                 2    | j                   | j                  fS r+   )rB   node_idevents    r"   <lambda>z2EventList._populate_cpu_children.<locals>.<lambda>a   s    u||U]]&C r#   c                 \    | j                   j                  | j                   j                   gS r+   )
time_rangestartendrF   s    r"   rH   z2EventList._populate_cpu_children.<locals>.<lambda>s   s&    5#3#3#9#9E<L<L<P<P;P"Q r#   r   Nz(There is already a CPU parent event for )is_asyncdevice_typer	   CPUsortedr   	itertoolsgroupbyr2   rJ   rK   rL   r   append_cpu_childr3   rC   set_cpu_parentappend)r   evtsync_eventseventsthreads
_thread_idthread_eventsthread_events_current_eventsrG   parents              r"   r%   z EventList._populate_cpu_childrenD   ss   $ 
<<COOz~~$E 
 

 8$
 ##C
  *1 	-%J#QN 35N' -.)A-+B/F((..&2C2C2G2GG ++//&2C2C2G2GG '**,//6!,,4REeii[QR4,,V4 .)A-  %%e,#-	-9
s   2E*c                 B   fdi }| D ]D  } |      |j                   |j                  |j                  f}||vs6|j                   ||<   F | D ]I  } |      }||j                  J |j                  |j                  f}||v r||   |_         Cg |_         K y )Nc                 P    | y | j                   dk(  r| S  | j                        S Nr/   )scoper3   )rW   	bw_parents    r"   rd   z6EventList._set_backward_stacktraces.<locals>.bw_parent   s*    {a
 00r#   )stacksequence_nrrB   
fwd_thread)r   
fwd_stacksrW   tprd   s        @r"   r'   z#EventList._set_backward_stacktraces   s    	1 
 	.C~%#))*?__cjj1J&$'IIJqM		.  	#C#A}||///]]ALL1
? *1CI "CI	#r#   c                 &    t        d | D              S )Nc              3   4   K   | ]  }|j                     y wr+   )self_cpu_time_total.0rG   s     r"   	<genexpr>z0EventList.self_cpu_time_total.<locals>.<genexpr>   s     ?5,,?   )sumr(   s    r"   rm   zEventList.self_cpu_time_total   s    ?$???r#   c                 T    t        | ||||||| j                  | j                  |
      S )a(  Print an EventList as a nicely formatted table.

        Args:
            sort_by (str, optional): Attribute used to sort entries. By default
                they are printed in the same order as they were registered.
                Valid keys include: ``cpu_time``, ``cuda_time``, ``xpu_time``,
                ``cpu_time_total``, ``cuda_time_total``, ``xpu_time_total``,
                ``cpu_memory_usage``, ``cuda_memory_usage``, ``xpu_memory_usage``,
                ``self_cpu_memory_usage``, ``self_cuda_memory_usage``,
                ``self_xpu_memory_usage``, ``count``.
            top_level_events_only(bool, optional): Boolean flag to determine the
                selection of events to display. If true, the profiler will only
                display events at top level like top-level invocation of python
                `lstm`, python `add` or other functions, nested events like low-level
                cpu/cuda/xpu ops events are omitted for profiler result readability.

        Returns:
            A string containing the table.
        )	sort_by	row_limitmax_src_column_widthmax_name_column_widthmax_shapes_column_widthheaderr   r   top_level_events_only)_build_tabler   r   )r   rt   ru   rv   rw   rx   ry   rz   s           r"   r,   zEventList.table   s;    : !5"7$;//''"7
 	
r#   c                 ^   ddl }| j                  sdn| j                  }t        |d      5 }d}|j                  d       | D ]  }|j                  |j                  dj                  |j                  |j                  j                  |j                  j                         |j                  s|j                  nd|j                   d|j                   d	             |j                  D ]P  }|j                  d
|j                   d|j                  j                   d|j                   d| d| d       |dz  }R  t        |       dkD  r=|j                  |j                         dz
  |j                          |j#                          |j                  d       ddd       y# 1 sw Y   yxY w)zExport an EventList as a Chrome tracing tools file.

        The checkpoint can be later loaded and inspected under ``chrome://tracing`` URL.

        Args:
            path (str): Path where the trace will be written.
        r   Ncudaw[zc{{"name": "{}", "ph": "X", "ts": {}, "dur": {}, "tid": {}, "pid": "CPU functions", "args": {{}}}}, z
" node_id:z, thread_id:z "z
{"name": "z", "ph": "s", "ts": z	, "tid": z , "pid": "CPU functions", "id": z, "cat": "cpu_to_z", "args": {}}, r/      ])osr   openwrite
trace_nameformatrJ   rK   
elapsed_us	is_remoterB   rE   r6   r2   seektellSEEK_SETtruncate)r   pathr   device_namefnext_idrW   _s           r"   export_chrome_tracezEventList.export_chrome_trace   s    	$($4$4f$:J:J$_ +	G GGCL !!>>)' (.v,,113"}} 

)#++l3::,bQ(   !A GG%cnn%5 6!!$!5!5 6 7""%** .!!(	 ***5 7((	 qLG!'!!D 4y1}qvvx!|R[[1

GGCLW+	 +	 +	s   E/F##F,c                 
    g dS )N)rm   self_cuda_time_totalself_xpu_time_totalself_privateuse1_time_total r(   s    r"   supported_export_stacks_metricsz)EventList.supported_export_stacks_metrics	  s    
 	
r#   r   metricc           	         || j                         vr%t        dt        | j                               z         t        j                  dd      }t	        |d      5 }| D ]  }|j
                  st        |j
                        dkD  s)t        ||j                  dd      j                  dd      j                  d	d            }t        |      dkD  std
}t        |j
                        D ]  }||j                  |      z  }|dz  } |d d dz   t        t        |            z   }|j                  |dz           	 d d d        y # 1 sw Y   y xY w)Nzmetric should be one of: z ;	
____r~   r   r}   devicexpuprivateuse1 ;rM    
)r   
ValueErrorstr	maketransr   re   r2   getattrreplaceintreversed	translater   )	r   r   r   translate_tabler   rW   metric_value	stack_strentrys	            r"   export_stackszEventList.export_stacks  s>   ==??+d::<=>  --&9$_ 	2 299SYY!!3#*vx8 1 9	$L <(1,$&	%-cii%8 -E%)III%,I- %.crNS$83s<?P;Q$Q		D 012	2 	2 	2s    E-EA
EA(EEc                    | j                   sJ t        t              }dt        t        df   ffd}| D ]  | ||||         j                         ! t        |j                         | j                  | j                  | j                        }|D ])  j                  d| _        |sd_        |r#d_        + |S )a  Averages all function events over their keys.

        Args:
            group_by_input_shapes: group entries by
                (event name, input shapes) rather than just event name.
                This is useful to see which input shapes contribute to the runtime
                the most and may help with size-specific optimizations or
                choosing the best candidates for quantization (aka fitting a roof line)

            group_by_stack_n: group by top n stack trace entries

            group_by_overload_name: Differentiate operators by their overload name e.g. aten::add.Tensor
            and aten::add.out will be aggregated separately

        Returns:
            An EventList containing FunctionEventAvg objects.
        return.c                    t        | j                        t        | j                        t        | j                        t        | j                        t        | j
                        g}|r|j                  j                         |r$|j                  t        | j                               |dkD  r|| j                  d | z  }t        |      S Nr   )r   rC   rE   rO   	is_legacyis_user_annotationrV   overload_nameinput_shapesre   tuple)rG   group_by_input_shapesgroup_by_stack_ngroup_by_overload_namerC   rW   s        r"   get_keyz'EventList.key_averages.<locals>.get_keyC  s     EIIEMM"E%%&EOO$E,,-C &

3,,-$

3u1123!#u{{#4$455:r#   r   r   r   Nr   )r   r   r   r   r   r7   r
   valuesr   r   r   re   r   r   )r   r   r   r   statsr   avg_listrW   s          @r"   key_averageszEventList.key_averages)  s    . 9DEU9V	38_	$  	C.0@BX c#h	 LLN''//''	
  	'C		"3#34CI(#% )$&!	' r#   c                 N    t               }| D ]  }||z  }d|_         d|_        |S )zVAverages all events.

        Returns:
            A FunctionEventAvg object.
        NTotal)r   rC   )r   
total_statrW   s      r"   total_averagezEventList.total_averagej  s;     &'
 	"C#J!JN	" !
r#   )Nd   K   7   P   NF)Fr   F)__name__
__module____qualname____doc__r   r)   r-   r&   r%   r'   propertyrm   r,   r   r   r   r   r   r   __classcell__)r!   s   @r"   r
   r
      s    1& "(C-J#4 @ @
   "#(
T6p
2# 2s 24 $$	?Br#   r
   c                 N    d}d}| |k\  r	| |z  ddS | |k\  r	| |z  ddS | ddS )+Define how to format time in FunctionEvent.g    .Ag     @@z.3fsmsusr   )time_usUS_IN_SECONDUS_IN_MSs      r"   _format_timer   x  sU    "LH,L(-Q//(H$S),,c]"r#   c                 D    |dk(  r| dk(  s
J d|         y| dz  |z  ddS )r   r   zExpected time_us == 0 but got NaNg      Y@.2f%r   )r   total_time_uss     r"   _format_time_sharer     s?    !|G=gYGG|o-c2!44r#   c                     d}d|z  }d|z  }t        |       |k\  r| dz  |z  ddS t        |       |k\  r| dz  |z  ddS t        |       |k\  r| dz  |z  ddS t        |       dz   S )z&Return a formatted memory size string.i         ?r   z Gbz Mbz Kbz b)absr   )nbytesKBMBGBs       r"   _format_memoryr     s    	B	B	B
6{b3,#C(,,	V	3,#C(,,	V	3,#C(,,6{T!!r#   c                       t         fd      S )Nc                 .    t        t        |             S r+   )r   r   )r   r4   s    r"   rH   z!_attr_formatter.<locals>.<lambda>  s    gdD.A!B r#   )r   r4   s   `r"   _attr_formatterr     s    BCCr#   c                       e Zd ZdZ ed      Z ed      Z ed      Z ed      Z ed      Z	 ed      Z
ed        Zed	        Ze ed
e      d               Zy)r   z{Helpers for FunctionEvent and FunctionEventAvg.

    The subclass should define `*_time_total` and `count` attributes.
    cpu_timedevice_timecpu_time_totaldevice_time_totalrm   self_device_time_totalc                 \    | j                   dk(  rdS d| j                  z  | j                   z  S Nr   g        r   )countr   r(   s    r"   r   zFormattedTimesMixin.cpu_time  s+    jjAosQ31D1D+Dtzz+QQr#   c                 \    | j                   dk(  rdS d| j                  z  | j                   z  S r   )r   r   r(   s    r"   r   zFormattedTimesMixin.device_time  s+    jjAosT31G1G+G$**+TTr#   z<`cuda_time` is deprecated, please use `device_time` instead.categoryc                     | j                   S r+   )r   r(   s    r"   	cuda_timezFormattedTimesMixin.cuda_time  s     r#   N)r   r   r   r   r   cpu_time_strdevice_time_strcpu_time_total_strdevice_time_total_strself_cpu_time_total_strself_device_time_total_strr   r   r   r   FutureWarningr   r   r#   r"   r   r     s    
 #:.L%m4O()9:+,?@-.CD!01I!JR R U U F 	 
 r#   r   c                       e Zd Zd Zd Zy)r   c                      || _         || _        y r+   )rK   rL   )r   rK   rL   s      r"   r   zInterval.__init__  s    
r#   c                 4    | j                   | j                  z
  S )z4
        Returns the length of the interval
        )rL   rK   r(   s    r"   r   zInterval.elapsed_us  s     xx$**$$r#   N)r   r   r   r   r   r   r#   r"   r   r     s    %r#   r   r   )r4   r   durationc                   \   e Zd ZdZddddddddddddej
                  ddddddddfdZd Zd Zd	 Z	e
d
        Ze
d        Ze
 ede      d               Ze
d        Ze
d        Ze
d        Ze
 ede      d               Ze
d        Ze
 ede      d               Ze
d        Zd Zy)r   z.Profiling information about a single function.Nr   FrM   c                    || _         || _        || _        || _        || _        t        ||      | _        || _        || _        g | _	        d| _
        g | _        d | _        || _        || _        || _        |	| _        |
| _        || _        || _        || _        || _        || _        || _        || _        || _        ||n|| _        || _        || _        || _        d| _        d| _        d| _         y )Nr/   rM   )!idrE   r4   r   r   r   rJ   rB   rg   r6   r   r5   r3   r   concrete_inputskwinputsre   rc   r   cpu_memory_usagedevice_memory_usagerN   r   rf   rO   device_indexdevice_resource_idr   flopsr   self_cpu_percenttotal_cpu_percenttotal_device_percent)r   r  r4   rB   start_usend_usr   rg   r   re   rc   r   r	  r
  rN   r   rf   rE   rO   r  r  r   r  r   r  r  r   s                              r"   r   zFunctionEvent.__init__  s   : #	"/)$,Xv$>!)3%'
1337-9*9(0 

)3%5(; &( +'2!-(0F6H 	  )$)
2D "!#$&!r#   c                     | j                   t        j                  k(  sJ | j                  j	                  t        |||             y r+   )rO   r	   rP   r6   rV   r   )r   r4   r   r  s       r"   append_kernelzFunctionEvent.append_kernel  s5    :>>111F4:;r#   c                     | j                   t        j                  k(  sJ t        |t              sJ |j                   t        j                  k(  sJ | j
                  j                  |       y)zAppend a CPU child of type FunctionEvent.

        One is supposed to append only direct children to the event to have
        correct self cpu time being reported.
        N)rO   r	   rP   
isinstancer   r5   rV   )r   childs     r"   rT   zFunctionEvent.append_cpu_child  sX     :>>111%///  JNN222  'r#   c                     | j                   t        j                  k(  sJ t        |t              sJ |j                   t        j                  k(  sJ || _        y)a$  Set the immediate CPU parent of type FunctionEvent.

        One profiling FunctionEvent should have only one CPU parent such that
        the child's range interval is completely inside the parent's. We use
        this connection to determine the event is from top-level op or not.
        N)rO   r	   rP   r  r   r3   )r   r_   s     r"   rU   zFunctionEvent.set_cpu_parent  sK     :>>111&-000!!Z^^333 r#   c                     | j                   s| j                  t        j                  k7  ry| j                  t        d | j                  D              z
  S )Nr   c              3   4   K   | ]  }|j                     y wr+   )r	  ro   r  s     r"   rp   z6FunctionEvent.self_cpu_memory_usage.<locals>.<genexpr>/  s      +
',E""+
rq   )rN   rO   r	   rP   r	  rr   r5   r(   s    r"   self_cpu_memory_usagez#FunctionEvent.self_cpu_memory_usage+  sJ    ==D,,
>$$s +
040A0A+
 (
 
 	
r#   c                     | j                   s| j                  t        j                  k7  ry| j                  t        d | j                  D              z
  S )Nr   c              3   4   K   | ]  }|j                     y wr+   )r
  r  s     r"   rp   z9FunctionEvent.self_device_memory_usage.<locals>.<genexpr>7  s      .
*/E%%.
rq   )rN   rO   r	   rP   r
  rr   r5   r(   s    r"   self_device_memory_usagez&FunctionEvent.self_device_memory_usage3  sJ    ==D,,
>''# .
373D3D.
 +
 
 	
r#   zO`self_cuda_memory_usage` is deprecated. Use `self_device_memory_usage` instead.r   c                     | j                   S r+   r  r(   s    r"   self_cuda_memory_usagez$FunctionEvent.self_cuda_memory_usage;  s     ,,,r#   c                 r    | j                   t        j                  k(  r| j                  j	                         S yr   )rO   r	   rP   rJ   r   r(   s    r"   r   zFunctionEvent.cpu_time_totalC  s*    z~~-??--//r#   c                     | j                   s| j                  t        j                  k7  ry| j                  t        d | j                  D              z
  S )Nr   c              3   4   K   | ]  }|j                     y wr+   )r   r  s     r"   rp   z4FunctionEvent.self_cpu_time_total.<locals>.<genexpr>N  s      )
%*E  )
rq   )rN   rO   r	   rP   r   rr   r5   r(   s    r"   rm   z!FunctionEvent.self_cpu_time_totalJ  sJ    ==D,,
>""S )
.2.?.?)
 &
 
 	
r#   c                    | j                   s| j                  sy| j                  t        j                  k(  ra| j
                  s9t        d | j                  D              t        d | j                  D              z   S t        d | j                  D              S | j                  t        j                  t        j                  t        j                  fv sJ | j                  j                         S )Nr   c              3   4   K   | ]  }|j                     y wr+   r  ro   kinfos     r"   rp   z2FunctionEvent.device_time_total.<locals>.<genexpr>Y       De5>>Drq   c              3   4   K   | ]  }|j                     y wr+   r   )ro   r=   s     r"   rp   z2FunctionEvent.device_time_total.<locals>.<genexpr>Y  s      K-/B((Krq   c              3   4   K   | ]  }|j                     y wr+   r(  r)  s     r"   rp   z2FunctionEvent.device_time_total.<locals>.<genexpr>^  r+  rq   )rN   r   rO   r	   rP   r   rr   r6   r5   CUDAPrivateUse1MTIArJ   r   r(   s    r"   r   zFunctionEvent.device_time_totalR  s    ==z~~->>Dt||DDs K373D3DK H  
 Dt||DDD##&&(   
 ??--//r#   zA`cuda_time_total` is deprecated. Use `device_time_total` instead.c                     | j                   S r+   r-  r(   s    r"   cuda_time_totalzFunctionEvent.cuda_time_totalg  s     %%%r#   c                 R   | j                   s| j                  sy| j                  t        j                  k(  r)| j
                  t        d | j                  D              z
  S | j                  t        j                  t        j                  t        j                  fv sJ | j
                  S )Nr   c              3   4   K   | ]  }|j                     y wr+   r-  r  s     r"   rp   z7FunctionEvent.self_device_time_total.<locals>.<genexpr>t  s      0,1''0rq   )rN   r   rO   r	   rP   r   rr   r5   r/  r0  r1  r(   s    r"   r   z$FunctionEvent.self_device_time_totalo  s    ==z~~-))C 0595F5F0 -   ##&&(   
 )))r#   zK`self_cuda_time_total` is deprecated. Use `self_device_time_total` instead.c                     | j                   S r+   r   r(   s    r"   r   z"FunctionEvent.self_cuda_time_total  s     ***r#   c                     | j                   S r+   r   r(   s    r"   rC   zFunctionEvent.key  s    yyr#   c           	         | j                   }| j                  }| j                  }dj                  g d| j                   d| j
                   d| j                   d| j                   d| j                   d| j                   d| j                  j                   d	| j                  j                   d
t        | j                  D cg c]  }|j                   c}       d| d| d| j
                   d| j                   dt        | j                          d| j"                   d| d| d| j$                   d| j&                   d| j(                   d| j*                   d      S c c}w )Nr   z<FunctionEvent id=z name=z overload_name=z device_type=z	 node_id=
 cpu_time=z
 start_us=z end_us=z cpu_children=r   _time=z thread= input_shapes= cpu_memory_usage=_memory_usage=z
 is_async=z is_remote=z seq_nr=z is_legacy=>)r   r   r
  joinr  r4   r   rO   rE   r   rJ   rK   rL   r   r5   rB   r   r	  rN   r   rf   r   )r   r   r   r
  r  s        r"   __repr__zFunctionEvent.__repr__  s   oo**"66y y  y	 y y		{ y/ y$J\J\I] y ^ y++,y,5y6:ll^yCMyNRN_N_M`yay--.y.6y7;7J7J6KyLy  t7H7H Ie IJKy LMy NYMy Z`y al_lymy II;	y '	y (,{{m	y 4B	y CFdFWFWBXAY	yZ 	y
 !% 5 56y
 78y
 9D}y
 ESy
 TgRgy
hy y '2y 37..1Ay BJy KOJZJZI[y \gy hlgugufvy wxy	
 !Js   F	)r   r   r   r   r	   rP   r   r  rT   rU   r   r  r  r   r   r"  r   rm   r   r3  r   r   rC   rA  r   r#   r"   r   r     se   8 NN 7>'@<	(
! 
 
 
 
 Y-	 
-   
 
 0 0( K&	 
& * * U+	 
+  
r#   r   c                   *    e Zd ZdZddZd Zd Zd Zy)r   z:Used to average stats over multiple FunctionEvent objects.Nc                 b   d | _         d| _        d| _        d| _        d| _        d | _        d| _        d| _        d| _        d| _	        d | _
        d | _        d | _        d | _        d| _        d| _        d| _        d| _        d | _        d | _        t(        j*                  | _        d| _        d| _        y )Nr   F)rC   r   rE   rN   r   r   r   r   rm   r   r   r   re   rc   r	  r
  r  r  r5   r3   r	   rP   rO   r   r  r(   s    r"   r   zFunctionEventAvg.__init__  s    "&
#$)-#$&'() +,#7;,0%)
$(
%&() *+"-.%;?37'1~~$
r#   c                 "   | j                   |j                   | _         |j                  | _        |j                  | _        |j                  | _        |j                  | _        |j
                  | _        |j                  | _        |j                  | _        |j                  | _        |j                  | _	        |j                  | _
        |j                  | _        |j                  | _        |j                  | _        t        |t        t         f      sJ |j                   | j                   k(  sJ | xj"                  |j"                  z  c_        | xj$                  |j$                  z  c_        | xj&                  |j&                  z  c_        | xj(                  |j(                  z  c_        | xj*                  |j*                  z  c_        | xj,                  |j,                  z  c_        | xj.                  |j.                  z  c_        | xj0                  |j0                  z  c_        | xj2                  |j2                  z  c_        | j4                  |j4                  | _        | S |j4                  | xj4                  |j4                  z  c_        | S r+   )rC   rE   rN   r   r3   r5   r   r   re   rc   rO   r   r   r   r  r   r   r   r   rm   r   r	  r
  r  r  r   r  r   others     r"   r7   zFunctionEventAvg.add  s   88 yyDH ==DL!NNDM"__DN#..DO % 2 2D!&!4!4D % 2 2DDJDJ$00D"__DN#..DO&+&>&>D#%-1A!BCCCyyDHH$$$u333%"9"99  E$=$== ##u'C'CC#!7!77  E$=$== ""e&A&AA"%%)G)GG%

ekk!
::DJ  [[$JJ%++%Jr#   c                 $    | j                  |      S r+   )r7   rE  s     r"   __iadd__zFunctionEventAvg.__iadd__  s    xxr#   c                 <   | j                   sdn| j                   }| j                  }| j                  }| j                  }d| j                   d| j
                   d| j                   d| d| d| d| dt        | j                         d	| j                   d| d
| dS )Nr}   z<FunctionEventAvg key=z self_cpu_time=r:  z  self_r;  r   r<  r=  r>  r?  )
r   r   r   r
  rC   r   r   r   r   r	  )r   r   self_device_timer   device_memorys        r"   rA  zFunctionEventAvg.__repr__  s    $(OOf::**00$TXXJod>Z>Z=[[efjfwfwex y M(8'9;-vk]Zhilmqm~m~i  iA A  $ 5 56a}NS`Raabd	
r#   )r   N)r   r   r   r   r   r7   rH  rA  r   r#   r"   r   r     s    D2$L	
r#   r   c                       e Zd Zd Zy)r   c                 p    t        |      dkD  rt        j                  j                  |      n|| |<   | |   S rb   )r2   torch_C	_demangle)r   rC   s     r"   __missing__zStringTable.__missing__  s2     033x!|EHH&&s+S	Cyr#   N)r   r   r   rQ  r   r#   r"   r   r     s    r#   r   c                       e Zd ZdZd Zd Zy)r   z=Acceleration structure for accessing mem_records in interval.c                     || _         g | _        g | _        t        |      dkD  rPt	        t        |      D cg c]  \  }}|d   j                         |f c}}      }t        | \  | _        | _        y y c c}}w r   )_mem_records_start_nses_indicesr2   rQ   r8   start_nszip)r   mem_recordsirtmps        r"   r   zMemRecordsAcc.__init__  sn    '&(#%{a9[;QR41a1Q4==?A.RSC.13i+Ddm  Rs   A5
c              #      K   t        j                  | j                  |dz        }t        j                  | j                  |dz        }t	        ||      D ]   }| j
                  | j                  |       " yw)z
        Return all records in the given interval
        To maintain backward compatibility, convert us to ns in function
        i  N)bisectbisect_leftrU  bisect_rightr1   rT  rV  )r   r  r  	start_idxend_idxrZ  s         r"   in_intervalzMemRecordsAcc.in_interval  sp     
 &&t'7'7DI	%%d&6&6Fy'* 	6A##DMM!$455	6s   A7A9N)r   r   r   r   r   rc  r   r#   r"   r   r     s    G86r#   r   c                 4     g d}t         fd|D              S )N))autograd/__init___make_grads)re  backward)ztorch/tensorrg  )_internal/common_utilsprof_callable)rh  prof_func_call)rh  prof_meth_callc              3   @   K   | ]  }|d    v xr |d   v    yw)r   r/   Nr   )ro   r   r   s     r"   rp   z&_filter_stack_entry.<locals>.<genexpr>  s)     OAaDEM3adem4Os   )all)r   filtered_entriess   ` r"   _filter_stack_entryro  
  s     O>NOOOr#   z[memory]z[OutOfMemory]c                 .    t         t        ddddddg}| |v S )Nz profiler::_record_function_enterz$profiler::_record_function_enter_newzprofiler::_record_function_exitzaten::is_leafzaten::output_nrzaten::_version)MEMORY_EVENT_NAMEOUT_OF_MEMORY_EVENT_NAME)r4   filtered_out_namess     r"   _filter_namert    s2     	 *.)	 %%%r#   c                 N    t               }||    } |r| j                  d      rd} | S )NzProfilerStep#zProfilerStep*)r   
startswith)r4   with_wildcardstring_tables      r"   _rewrite_namery  -  s,    =LD???+"DKr#   c
                 l  ./012 t        |       dk(  ryt        d | D              }
t        d | D              }| d   j                  }|s|
rt        d      t        d | D              }t        d | D              }t	        t        | fd	d
      |||      } t        d | D              dz   }|t        ||      }t        d | D              dz   }|t        ||      }d}|}d}| D cg c]4  }|j                  t        |j                        dkD  s)|j                  6 }}t        |      dkD  }|r#t        d |D              dz   }|t        ||      }dg}|r|j                  d       |g dz  }||j                         nd}|
r"|j                  d| d| d| d| dg       |r1|j                  ddg       |r|r|j                  | dd| dg       |j                  d       t        d | D              }|r|j                  d       d .dg2dg/. g0d4./02fd!	}d" } ||       |r ||       |d#|z   d D ]
  } ||        |r|j                  d$        ||       |r|j                  d%        ||d&'       |rj| D cg c]  }|j                  dkD  s|j                    }}t        |      dk7  r1 |t        |            \  }}|j                  d(|         ||       nd)}2d   } /d   }!0d   }"d}g 11fd*}#d}$d}%| D ]  }|$|j                  z  }$|j                  t        j                   k(  r|j"                  r|%|j$                  z  }%K|j                  t        j&                  t        j(                  t        j*                  fv s|j,                  r|%|j$                  z  }% | |#d+|"z          |#|       |	r |#d+|"z          |#d,        |#|!        |# | j.                  |         |#|!       d- }&d}'| D ]<  }|'|k(  r n4|	r|j0                  |'d#z  }'|j2                  }(|t        |(      |d.z
  k\  r|(d|d.z
   d/z   }(t5        |j                  |$      |_        |j8                  st5        |j:                  |$      nd|_        |(g})|r0|j>                  }*|t        |*      |d.z
  k\  r|*d|d.z
   d/z   }*|)|*gz  })|)|j6                  |j@                  |j<                  |jB                  |jD                  gz  })|
rXt5        |j$                  |%      |_#        |)j                  |jH                  |jF                  |jJ                  |jL                  g       |rv|)j                  tO        |jP                        tO        |jR                        g       |r;|r9|)j                  tO        |jT                        tO        |jV                        g       |)j                  |jX                         |r|)j                  |jZ                         |r'|)j                  t]        |j^                        d|        |rA|j                  dk  r|)j                  d0       n |)j                  |j                  z  d1       |rAd}+t        |j                        dkD  r |&|j                  d   |      }+|)j                  |+        |# | j.                  |)        |sdgt        |      d#z
  z  },|j                  d#d D ]"  }- |# | j.                  |, |&|-|      gz           $ |,j                  d        |# | j.                  |,        ?  |#|!        |#d2ta        |$              |
r) |#d||j                         nd d3ta        |%              djc                  1      S c c}w c c}w )5zUPrint a summary of events (which can be a list of FunctionEvent or FunctionEventAvg).r   r   c              3   :   K   | ]  }|j                   d kD    ywr   Nr7  rn   s     r"   rp   z_build_table.<locals>.<genexpr>F  s     Ou%66:O   c              3   :   K   | ]  }|j                   d kD    ywr|  r!  rn   s     r"   rp   z_build_table.<locals>.<genexpr>G  s     P77!;Pr}  z9use_device is None, but there is device performance data.c              3   l   K   | ],  }|j                   d uxr t        |j                         dkD   . y wr   )r   r2   rn   s     r"   rp   z_build_table.<locals>.<genexpr>O  s;       
		4	'	GC0B0B,Ca,G	G   24c              3   l   K   | ],  }|j                   d uxr t        |j                         dkD   . y wr   )r   r2   rn   s     r"   rp   z_build_table.<locals>.<genexpr>T  s;       
		D	(	IS1D1D-E-I	Ir  Nc                 |    t        | j                  dd      j                  dd      j                  dd            S )Nr}   r   r   r   )r   r   )rW   rt   s    r"   rH   z_build_table.<locals>.<lambda>]  s5    OOFH5WUH-W]H5	! r#   T)rC   reverser   c              3   F   K   | ]  }t        |j                          y wr+   )r2   rC   ro   rW   s     r"   rp   z_build_table.<locals>.<genexpr>j  s     ;SCL;s   !   c              3   X   K   | ]"  }t        t        |j                               $ y wr+   )r2   r   r   r  s     r"   rp   z_build_table.<locals>.<genexpr>n  s      KSc#c&6&6"78Ks   (*   c              3   @   K   | ]  }t        d  |D                yw)c              3   2   K   | ]  }t        |        y wr+   r2   )ro   r   s     r"   rp   z)_build_table.<locals>.<genexpr>.<genexpr>|  s     25CJ2s   N)max)ro   re   s     r"   rp   z_build_table.<locals>.<genexpr>|  s     Gu2E22Gs   NamezOverload Name)z
Self CPU %zSelf CPUzCPU total %z	CPU totalzCPU time avgNonezSelf z %z totalz	 time avgzCPU MemzSelf CPU Memz Memz
# of Callsc              3   :   K   | ]  }|j                   d k7    yw)rM   N)rE   r  s     r"   rp   z_build_table.<locals>.<genexpr>  s     =s*=r}  zNode IDr   c                     dxx   d|z   t        |       z   dz   dz  z   z  cc<   dxx   d| z  dz  z   z  cc<   dxx   | z   z  cc<   y )Nr   z{: }r   -)r   )paddingtext_dirSPACING_SIZEheader_sep_lstline_length_lstrow_format_lsts     r"   
add_columnz _build_table.<locals>.add_column  sh    qHs7|+c1S<5GH	
 	qS7]cL.@AAg44r#   c                 $   g d}| dkD  sJ t        dt        t        j                  |       dz  t	        t        |      dz
                    }|dk\  r|t        |      k  sJ t        dt        j                  |      dz        |t        |         fS )N)FLOPsKFLOPsMFLOPsGFLOPsTFLOPsPFLOPsr      r/   
   g      )	r  minmathlog10floatr2   powfloorr   )r  flop_headers	log_flopss      r"   auto_scale_flopsz&_build_table.<locals>.auto_scale_flops  s    
 qyy3tzz%014eC<MPQ<Q6RST	A~)c,.?"???BI.57c)n9UVVr#   r/   zInput ShapeszSource Location<)r  zTotal Fc                 J    j                  |        j                  d       y )Nr   )rV   )r   results    r"   rV   z_build_table.<locals>.append  s    adr#   =z1This report only display top-level ops statisticsc                 t    t        |       |kD  r)t        |       |z
  }| |d  } t        |       dkD  rd| dd  z   } | S )Nr  ...r  )r   src_column_widthoffsets      r"   	trim_pathz_build_table.<locals>.trim_path  sI    t9''Y!11F=D4y1}tABx'r#   r  r  z--z8.3fzSelf CPU time total: z time total: )r?  )2r2   anyr   RuntimeErrorr
   rQ   r  r  re   rV   upperr:   r  rm   rO   r	   rP   r   r   r/  r0  r1  r   r   r3   rC   r   r  rN   r   r  r   r   r   r   r  r   r   r   r   r	  r  r
  r  r   rE   r   r   r   r@  )3rY   rt   ry   ru   rv   rw   rx   r   r   rz   has_device_timehas_device_memr   has_input_shapeshas_overload_namesname_column_widthshapes_column_widthDEFAULT_COLUMN_WIDTHflops_column_widthr  rW   stacks	has_stackheadersr   append_node_idr  r  r   	raw_flopsflops_scaleflops_header
row_format
header_sepline_lengthrV   sum_self_cpu_time_totalsum_self_device_time_totalr  event_limitr4   
row_valuesr   	src_fieldempty_headersr   r  r  r  r  r  s3    `                                            @@@@@r"   r{   r{   6  s    6{aOOOOPPPN%%J /VWW  
   
  	 ")!
  ;F;;a?( 13HIKFKKaO*!"57NO-#syy'<SYYRSAS		F  FaIGGG!K 	  +"#35IJhG'  G )3(>*""$FK}%}B'-v&-y)		
 	
 .NN"m4(K=- NN< =f==Ny! LTNTN$}oO5 5W  !$%Q++-. )'() ~&&'()#c2*0B3CIIMSYYB	By>Q*:3y>*J'[,NNVL>23)*J"J"J!!$KJ F  !" E3#:#::??jnn,&#*D*DD&OO&& ** '#*D*DD&E$ s[ !vs[ !BC
:
:g&'
: K b6)# S^^%?1Kww ,T>SVW>W1W50146>D1##%< 

 << s113JK 	 V
--M%1&*?!*CC -.K1F1J Lu T=/)J  ''!!""
 	

 '9**,F(C$ 22,,--''  #3#7#78"3#<#<=	 n!! 's'>'>?&s'C'CD	 	II	
 ckk*c#"2"234H5HIJyyA~!!$'!!SYY%<T$BDI399~!%ciil4DE	i( z  *-.DCL1$45M12 %J%%'9U<L+M*NN   $$:$$m45Eb6H :
"<0G#H"IJK**@J$$&fM N'(BCDF	
 776?M~ Cs   ,^,>^,^,^1*^1)F)	NNr   r   r   r   FFF)$r^  rR   r  collectionsr   r   operatorr   typingr   r   typing_extensionsr   rN  torch.autogradr	   __all__listr
   r   r   r   r   r   r   r   r   r   r   r   ro  rq  rr  rt  ry  r{   r   r#   r"   <module>r     s       /    (  %	[ [|
5"D   <	% 	% 
H<	=L
' L
^N
* N
b+ 6 6,	P  * && Fr#   