
    Vh-                         d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZ d dlZd dlmZ d dlmZ erd dlmZ dZ G d	 d
e      Z G d d      Zy)    N)defaultdict)Sequence)chain)AnyCallableno_type_checkTYPE_CHECKING)TorchDispatchMode)RemovableHandleg      0Ac                        e Zd ZdZddZddZy)MemoryProfileDispatchModezCRun in ``TorchDispatchMode`` to get memory stats at operator level.Nc                     || _         y )N)memory_tracker)selfr   s     W/home/dcms/DCMS/lib/python3.12/site-packages/torch/distributed/_tools/memory_tracker.py__init__z"MemoryProfileDispatchMode.__init__   s
    ,    c                     ||i |}|t         j                  j                  j                  j                  k(  r|S | j
                  j                  dz   |j                  z   dz   t        | j
                  j                  |j                           z   }| j
                  j                  |j                     dz   | j
                  j                  |j                  <   | j
                  j                  |       |S )N._   )torchopsatendetachdefaultr   _cur_module_name__name__str_operator_names_record_memory_stats)r   functypesargskwargsrs	func_names          r   __torch_dispatch__z,MemoryProfileDispatchMode.__torch_dispatch__   s    4"6"599>>((000I00mm  $%%55dmmDE	F 	 //>B 	++DMM: 	00;	r   returnN).N)r   
__module____qualname____doc__r   r(    r   r   r   r      s    M-r   r   c                      e Zd ZdZddZedej                  ddfd       Zedd       Z	edde
ddfd	       Zedd
eddfd       Zd
eddfdZd
eddfdZdedefdZdedefdZdedefdZededdfd       ZdeddfdZddZy)MemoryTrackera  
    Collect and plot the memory stats at operator level.

    Includes ``memories_allocated``, ``memories_active`` and ``memories_reserved``.
    It also prints a summary for the top 20 operators that generate the most memories.

    Example usage:

        >>> # xdoctest: +SKIP(failing)
        >>> net.cuda()
        >>> input = input.cuda()

        >>> mem_tracker = MemoryTracker()
        >>> mem_tracker.start_monitor(net)

        >>> net.zero_grad(True)
        >>> loss = net(input)
        >>> if isinstance(loss, dict):
        >>>    loss = loss['out']
        >>> loss.sum().backward()
        >>> net.zero_grad(set_to_none=True)

        >>> mem_tracker.stop()
        >>> mem_tracker.summary()
        >>> mem_tracker.show_traces()
    r*   Nc                 $   t         j                  j                  d       g | _        t	        t
              | _        t	               | _        t	               | _        t	               | _	        t	        t
              | _
        d| _        d| _        d| _        y )Nz torch.distributed.memory_tracker r   )r   _C_log_api_usage_once_hooksr   intr    memories_allocatedmemories_activememories_reserved_markersr   	_op_index_num_cuda_retriesr   s    r   r   zMemoryTracker.__init__J   sj    $$%GH-//:3/??J}<GM>Im(3C(8%'&'r   root_modulec                    | j                          |j                  dd       |j                         D ]}  \  }}||ur|j                  dd       d|v r!|j                  | j	                  |            }|j                  | j                  |            }| j                  j                  ||g        t        j                  j                          t        | dd      J t        |       | _        | j                  j                          y)z
        Register module hooks and entering ``MemoryProfileDispatchMode``.

        This enables operator level memory stats can be tracked during module runtime.
        _memory_tracker_is_rootTFz".fused_proxy_grouped_embedding_bagprofile_modeN)_clear_state__setattr__named_modulesregister_forward_pre_hook_create_pre_forward_hookregister_forward_hook_create_post_forward_hookr5   extendr   cudaempty_cachegetattrr   rA   	__enter__)r   r>   namemh1h2s         r   start_monitorzMemoryTracker.start_monitorV   s     	 94@"002 	)GD!#7?3t; ,,T-J-J4-PQB(()G)G)MNB KKBx(	) 	

 t^T2:::5d;##%r   c                 F   t         j                  j                         j                  dd      | _        | j
                  D ]  }|j                           | j
                  j                          t        | dd      J | j                  j                  ddd       d| _	        y)z
        Remove module hooks and exit ``MemoryProfileDispatchMode`` to stop tracking memory stats at operator level.

        Get some aggregated stats when the memory_tracker() is enabled, like cuda ``num_alloc_retries``.
        num_alloc_retriesr   rA   N)r   rJ   memory_statsgetr<   r5   removeclearrL   rA   __exit__)r   hs     r   stopzMemoryTracker.stopr   s     "'!8!8!:!>!>?RTU!V 	AHHJ	t^T2>>>""4t4 r   topc                    t        t              }| j                  d   \  }}t        d| j                        D ]  }| j                  |   \  }}||z
  ||<   |}  t        d       t        d| j                          t        d| d       t        |j                         t        j                  d      d      d	| D ]  \  }}t        | d
| d        t        d       y	)z
        Print out the top operators that generate the most memories.

        The number of the top operators can be configured.
        r   r   z0------------------------------------------------z The number of cuda retries are: zTop z ops that generates memory are:T)keyreverseNz: MB)r   floatr7   ranger;   printr<   sorteditemsoperator
itemgetter)	r   r\   op_diffop_nameprevious_allocated_memoryicurrent_allocated_memorykvs	            r   summaryzMemoryTracker.summary   s     %0$6-1-D-DQ-G**q$..) 	AA040G0G0J-G-7:SSGG(@%	A
 	@A01G1G0HIJSE89:7==?0C0CA0FPTUS
 	!DAq QCr!B- 	! 	@Ar   pathc                    	 dd l m	 	 fd}|dk7  r j                  |        j                  j	                         D cg c]  \  }}|	 }}} j
                  j	                         D cg c]  \  }}|	 }}} j                  j	                         D cg c]  \  }}|	 }}}t        t        t        |                  } ||t        |      t        |      t        |      gg d        ||t        |      gdg        ||t        |      gdg        ||t        |      gdg       y c c}}w c c}}w c c}}w )Nr   c                    t        t        j                  |            dz  }t        t        j                  |            dz  }	j	                          t        ||      D ]  \  }}	j                  | ||        	j                  d       	j                  d       	j                          
j                  j                         D ]?  \  }}|dk(  r	j                  ||g||gdd|	       &	j                  ||g||gd
d|	       A y )Ng+?gjt?)labelz# Operator CallszMemory (MB)fw_bw_boundaryr   )lwrs   zk-)minr   from_iterablemaxfigurezipplotxlabelylabellegendr:   re   )xy_valueslabelsmin_valmax_valyrs   marker_namemarkerpltr   s            r   _plot_figurez/MemoryTracker.show_traces.<locals>._plot_figure   s   %--h785@G%--h785@GJJL&1 ,5AU+,JJ)*JJ}%JJL'+}}':':'< #V"22HH( '*)   HH( '*)  r   r2   )allocated_memoryactive_memoryreserved_memoryr   r   r   )
matplotlib.pyplotpyplotloadr7   valuesr8   r9   listrb   len)
r   rp   r   rN   gby_1y_2y_3r   r   s
   `        @r   show_traceszMemoryTracker.show_traces   s   '	6 2:IIdO$($;$;$B$B$DEjtRrEE$($8$8$?$?$ABjtRrBB$($:$:$A$A$CDjtRrDDs3x! 	#YS	49-D	

 	Qc&8%9:Qco%67Qc&7%89 FBDs   D0.D6D<c                    | j                   | j                  | j                  | j                  | j                  d}t        |d      5 }t        j                  ||t        j                         ddd       y# 1 sw Y   yxY w)zjSave the stats using pickle during runtime if users want to plot the traces in other places like notebook.)r7   r8   r9   markersrT   wbN)	r7   r8   r9   r:   r<   openpickledumpHIGHEST_PROTOCOL)r   rp   statsfs       r   
save_statszMemoryTracker.save_stats   so     #'"9"9#33!%!7!7}}!%!7!7
 $ 	;KKq&"9"9:	; 	; 	;s   &A66A?c                     t        |d      5 }t        j                  |      }ddd       d   | _        |d   | _        |d   | _        |d   | _        |d   | _        y# 1 sw Y   <xY w)zFLoad the pickled memory stats to plot the traces or print the summary.rbNr7   r8   r9   r   rT   )r   r   r   r7   r8   r9   r:   r<   )r   rp   r   r   s       r   r   zMemoryTracker.load   sv    $ 	#KKNE	# #((<"=$%67!&':!;i(!&':!;	# 	#s   AA'rN   c                 H     dt         j                  dt        ddf fd}|S )zkPrefix operator name with current module and 'forward', and insert 'fw_start' marker at forward pass start.moduleinputsr*   Nc                 p     d_         t        | d      r| j                  rj                  d       y y y )Nz.forwardr@   fw_start)r   hasattrr@   _add_marker)r   r   rN   r   s     r   _pre_forward_hookzAMemoryTracker._create_pre_forward_hook.<locals>._pre_forward_hook   s>    '+fH$5D! 9:22  , 3 ;r   )nnModuler   )r   rN   r   s   `` r   rF   z&MemoryTracker._create_pre_forward_hook   s)    	-bii 	- 	- 	- ! r   c                      dt         j                  dt        t        j                     dt        t        j                     ddf fd}|S )zPInsert the marker 'fw_bw_boundary' at the boundary of forward and backward pass.r   r   outputsr*   Nc                 \    t        | d      r| j                  rj                  d       y y y )Nr@   rt   )r   r@   r   )r   r   r   r   s      r   _post_forward_hookzCMemoryTracker._create_post_forward_hook.<locals>._post_forward_hook   s2      9:22  !12 3 ;r   )r   r   r   r   Tensor)r   rN   r   s   `  r   rH   z'MemoryTracker._create_post_forward_hook   sJ    		3II		3U\\*		3 ell+		3 			3 "!r   c                 |     dt         j                  dt        j                  dt        j                  ddf fd}|S )zJInsert the current module name with backward prefix for the operator name.r   
grad_inputgrad_outputr*   Nc                      d_         y )Nz	.backward)r   )r   r   r   rN   r   s      r   _backward_hookz;MemoryTracker._create_backward_hook.<locals>._backward_hook  s     (,fI$6D!r   )r   r   r   r   )r   rN   r   s   `` r   _create_backward_hookz#MemoryTracker._create_backward_hook   s<    	7II	7+0<<	7FKll	7	7
 r   fn_namec                    t         j                  j                         t        z  }t         j                  j	                         t        z  }t         j                  j                         j                  dd      t        z  }||f| j                  | j                  <   ||f| j                  | j                  <   ||f| j                  | j                  <   | xj                  dz  c_        y)z
        Record current memory allocated, current memory active and current memory reserved.

        The memory stats dict is indexed with ``self._op_index``.
        zactive_bytes.all.currentr   r   N)r   rJ   memory_allocatedBYTES_PER_MBmemory_reservedrU   rV   r7   r;   r9   r8   )r   r   r   r   memory_actives        r   r!   z"MemoryTracker._record_memory_stats	  s     #(**"="="?,"N!&!;!;!=!LJJ##%))*DaH<W 	 4;<L2M/29?1Kt~~.07/GT^^,!r   r   c                 h    t        | j                  j                               }|| j                  |<   y)zSet the marker's x-axis value.N)r   r7   r   r:   )r   r   
marker_vals      r   r   zMemoryTracker._add_marker  s)    00779:
%/k"r   c                 2   | j                   j                          | j                  j                          | j                  j                          | j                  j                          | j
                  j                          d| _        d| _        d| _        y)z,Clear states when start_monitor() is called.r2   r   N)	r    rX   r7   r8   r9   r:   r   r;   r<   r=   s    r   rB   zMemoryTracker._clear_state  sq    ""$%%'""$$$& "!"r   r)   )   )r2   )r   r+   r,   r-   r   r   r   r   rR   r[   r6   ro   r   r   r   r   r   rF   rH   r   r!   r   rB   r.   r   r   r0   r0   .   s(   6
( & &t & &6 ! ! B3 B B B, .: .:T .: .:`;s ;t ;	< 	< 	<!S !X !"c "h " # (  C D   0s 0t 0
	#r   r0   )rf   r   collectionsr   collections.abcr   	itertoolsr   typingr   r   r   r	   r   torch.nnr   torch.utils._python_dispatchr
   torch.utils.hooksr   r   r   r0   r.   r   r   <module>r      sK      # $  > >   : 1  1 2z# z#r   