
    Vht'                        d dl Z d dlmZ d dlmZmZ d dlZd dlZd dl	m
Z
mZmZmZ d dlmZ d dlmZmZ  G d de      Z G d	 d
e      Z G d de      Z G d de      Zdej.                  j0                  dedededej2                  defdZ G d de      Z G d d      ZdedefdZdededefdZ dededefdZ!d%d e"d!edefd"Z#d#ede$e"e%f   fd$Z&y)&    N)OrderedDict)cast	TypedDict)_MemRefType_ModMemStats	_ModState
MemTracker)RuntimeEstimator)SACEstimatorSACTradeOffStatsc                   N    e Zd ZU ee   ed<   ee   ed<   ee   ed<   ee   ed<   y)ModOrderfw_pre_orderbw_pre_orderfw_post_orderbw_post_orderN)__name__
__module____qualname__liststr__annotations__     R/home/dcms/DCMS/lib/python3.12/site-packages/torch/distributed/_tools/ilp_utils.pyr   r      s*    s)s)99r   r   c                   "    e Zd ZU eed<   eed<   y)
ModRuntimefwbwN)r   r   r   floatr   r   r   r   r   r      s    IIr   r   c                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   eed<   eed<   eed<   eed<   eed<   eed<   ee   ed<   ee   ed<   ee   ed<   e	eef   ed<   y)ModStatsfqnparam_per_modulegrad_per_module
grad_totalact_fw_per_moduleact_bw_per_moduleact_grad_per_module	act_totalinput_per_moduleoutput_per_modulefw_runtime_per_modulebw_runtime_per_moduleis_leafsac_runtime
sac_memory
n_segmentsslopes
interceptsbreakpointstradeoff_curveN)
r   r   r   r   r   intr    boolr   r   r   r   r   r"   r"      s    	HO N    MOOKUeu--r   r"   c                   (    e Zd ZU eed<   ee   ed<   y)
ModuleInfo	mod_order	mod_statsN)r   r   r   r   r   r   r"   r   r   r   r:   r:   I   s    H~r   r:   modelmem_trackerruntime_estimatorsac_estimatordevreturnc           	         t        t        j                  |j                              }|j                  j                         D ci c]  \  }}||d   |d   d }}}t        |j                        t        |j                        t        |j                        t        |j                        d}	|j                          t        j                  |j                        }
|	g d}| j                         D ]  }|j                  |d      x}s|
j                  |j                  d      x}rW|j                   }|j"                  }|j$                  }|j&                  }|j(                  }|j*                  }|j,                  }d}ndx}x}}g x}x}}t/               }d	}i d
|j                  d|j0                  d|j0                  d|j2                  t4        j6                     d   |   t8        j:                     dt=        d|j2                  t4        j>                     d   |   t8        j@                     |j2                  t4        jB                     d   |   t8        j@                     z
  |jD                  z
        dt=        d|j2                  t4        jF                     d   |   t8        j@                           d|j2                  t4        jF                     d   |   t8        jH                     |j2                  t4        j6                     d   |   t8        jH                     z
  d|j2                  t4        j>                     d   |   t8        j@                     d|jJ                  d|jD                  d||j                     d   d||j                     d   d|d|d|d|d||||d}|d   jM                  |        |S c c}}w )a  
    Collect modulewise stats for a given model, including memory, runtime, and AC tradeoff stats.

    Args:
        model: nn.Module object
        runtime_estimator: RuntimeEstimator object with runtime stats
        mem_tracker: MemTracker object with memory stats
        sac_estimator: SACEstimator object with AC tradeoff stats
        dev: device the model was run on (used to extract memory stats from MemTracker)

    Returns:
        ModuleInfo: A dictionary with module order and module stats.
    r   r   )r   r   )r   r   r   r   )r;   r<   NFr   Tr#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   )r4   r5   r6   r<   )'dictcopydeepcopymemory_trackingmod_runtimesitemsr   mod_fw_pre_ordermod_bw_pre_ordermod_fw_post_ordermod_bw_post_orderpwlf_sac_tradeoff_curvesac_mod_tradeoff_statsmodulesgetmod_fqnr0   r1   r2   r3   r4   
fit_breaksr6   r   parameter_mem	snapshotsr   PRE_BWr   GRADmaxPOST_FWACTPRE_FW
output_memPEAK_BWTEMP	input_memappend)r=   r>   r?   r@   rA   mod_mem_statsr#   vmod_runtime_statsr;   mod_sac_tradeoff_statsmodule_infomodmod_mem_stattradeoff_statsr0   r1   r2   r3   r4   r5   r6   r/   mod_stats                           r   aggregate_statsrk   N   s9   , :>k112:M (44::<0C 	AdG1T7++0 0 .??@.??@/AAB/AAB	I ))+:>--,,;
 K
 }} 76(,,S$77<7!7!;!;L<P<PRV!WW~W,88+66
+66
'..+66
,77!/!>!>8999j:4666k<GM&"|++&""L$>$>&" "<#=#=&" l44Y5E5EFrJ3O$$	&" $S **9+<+<=bA#F{W",,Y-=-=>rB3GXY"--.&&" $S **9+<+<=bA#F{W&&"" & **9+<+<=bA#F{GWGWX",,Y-=-=>rB3G#((%&". \33I4E4EFrJ3OOO/&"4 #L$:$:5&"6 $\%<%<7&"8 ():<;O;O)PQU)V9&": ():<;O;O)PQU)V;&"< 7=&"> {?&"@ jA&"B jC&"D &E&"F )*"0K&"HN $++H5o76r c0s   Oc                   "    e Zd ZU eed<   eed<   y)Nodeindexpos_fw_post_orderN)r   r   r   r7   r   r   r   r   rm   rm      s    Jr   rm   c                   ,    e Zd ZdeddfdZdeddfdZy)GraphnrB   Nc                 f    g | _         i | _        t        j                  ||f      | _        g | _        y )N)nodes	name2nodenpzeros	ad_matrixr   )selfrr   s     r   __init__zGraph.__init__   s,    !#
*,1a&)(*r   nodec                 ^    | j                   j                  |       || j                  |d   <   y Nr#   )rt   ra   ru   )ry   r{   s     r   add_nodezGraph.add_node   s&    

$&*tE{#r   )r   r   r   r7   rz   rm   r~   r   r   r   rq   rq      s(    +# +$ ++T +d +r   rq   rf   c                 6   | d   }| d   d   t        |      t              k(  sJ t        |      }t        |      }| d   d   |_        t        |fd      | d<   t	        |      D ]L  \  }}t        t        |      }||d<   |j                  j                  |d         |d	<   |j                  |       N t        |      D ]S  }t        ||      D ]B  }t        |j                  |   d   |j                  |   d         rd
|j                  |   |<   B S U |S )z
    Parse module info and create a graph (tree) of modules. The graph will be
    used by MILP solver to find optimal SAC and/or FSDP configurations.
    r<   r;   r   r   c                 ,    j                  | d         S r}   )rn   )xr   s    r   <lambda>z#parse_module_info.<locals>.<lambda>   s    !3!3AeH!= r   )keyrn   r#   ro      )lenrq   r   sorted	enumerater   rm   rn   r~   rangeis_self_or_submodulert   rx   )	rf   r<   n_nodesgione_mod_statsr{   jr   s	           @r   parse_module_infor      s6   
 K(I{+N;Ly>S....)nG 	gA!+.?AO  &= K &i0 =$.W$%OO$9$9$u+$F !	

4	 7^ q'" 	A#AGGAJu$5qwwqz%7HI$%Aq!		 Hr   name_descendantname_ancestorc                     | |k(  xs |dz   | v S )z[
    check if name_descendant is a submodule of name_ancestor, or if they are the same
    .r   r   r   s     r   r   r      s     m+U}s/Bo/UUr   c                     |dz   | v S )zN
    if name_descendant is a submodule of name_ancestor, but not the same
    r   r   r   s     r   is_submoduler      s     3/11r   bunitc                 b    |dk(  r	| dz  ddS |dk(  r	| dz  ddS |dk(  r	| d	z  dd
S | ddS )zN
    return a string that represent the number of bytes in a desired unit
    KiBi   z.2fz KiBMiBi   z MiBGiBi   @z GiBz bytesr   )r   r   s     r   display_bytesr      sa     u}e)C%%u}e)C%%u}e)C%%WFr   graphc                 \   | j                   d   d   }t        | j                         }d}t        |      D ]M  }| j                   |   d   }| j                   |   d   }| j                   |   d   }t        |||z   |z   |z         }O | j                   d   d   | j                   d   d   z   }||fS )aV  
    Get the baseline peak memory and runtime.
    Baseline here means there is no FSDP or AC.
    Memory includes the parameters, gradients, activations, and activation gradients.
    Memory does not include e.g., optimizer states, embedding tables, etc.

    Returns:
        int: peak memory in bytes
        float: compute time in ms
    r   r$   r&   r)   r*   r-   r.   )rt   r   r   rY   )	r   P_1	num_nodespeak_memr   TG_iAG_iTA_icompute_times	            r    get_peak_memory_runtime_baseliner     s     ++a.+
,CEKK IH9 ;{{1~l+{{1~34{{1~k*xtd!2T!9:	; 	A./
++a.0
1	2  l##r   )r   )'rF   collectionsr   typingr   r   numpyrv   torch$torch.distributed._tools.mem_trackerr   r   r   r	   *torch.distributed._tools.runtime_estimatorr
   &torch.distributed._tools.sac_estimatorr   r   r   r   r"   r:   nnModuledevicerk   rm   rq   r   r   r8   r   r   r7   r   tupler    r   r   r   r   <module>r      sA    # "    H Qy  
(.y (.V 
l88??ll (l  	l
 
l l^8 
	+ 	+!: !% !HV# Vc Vd V2# 2c 2d 2
S 
 
 
$E $eCJ6G $r   