
    Vh9,                        d dl Z d dlZd dlmZ d dlmZ d dlmZmZ d dl	m
Z
 	 d dlmZmZmZmZmZmZmZmZmZmZ  e j0                  e      Zej7                  e j8                         	 	 	 dded	ed
edeee       deee       de!e"e ef   eef   fdZ# G d de      Z$de
d	edee   fdZ%y# e$ rZ ed      edZ[ww xY w)    N)IntEnum)Optional)Graphis_submodule)SACStats)
lpDot	LpInteger
LpMaximize
LpMinimize	LpProblemLpStatuslpSum
LpVariablePULP_CBC_CMDvaluezBPlease install pulp package. See: https://github.com/coin-or/pulp.graphmemory_budget
world_sizeac_units
fsdp_unitsreturnc                    % t         j                        }d}d}t        dt              }t	        j
                  dt        t        |            ddt              }	t	        j
                  dt        t        |            dd      }
t	        j
                  dt        t        |            d      }t	        j
                  d	t        t        |            d      }t	        j
                  d
t        t        |            d      }t	        j
                  dt        t        |            d      }t	        j
                  dt        t        |            d      }t	        dd      }|r;t        |      }t        |      D ]"  % j                  %   d   |vs||	%   dk(  z  }$ |r1t        |      D ]#  %t         %fd|D              s||	%   dk(  z  }% t        |      D ]=  %t        %dz   |      D ])  } j                  %   |   dk(  s||	%   |	|   z   dk  z  }+ ? t        |      D ]   % j                  %   d   s||	%   dk(  z  }" t        |      D ]?  % j                  %   d   s j                  %   d    j                  %   d   k  rt        j                  d j                  %   d          t        j                  d j                  %   d          t        j                  d j                  %   d          t        j                  d       t        j                  d        j                  %   d    j                  %   d<    j                  %   d   |z  } j                  %   d   |z  }||%   ||
%   z  ||z
  |	%   z  z
  k(  z  }B t        |      D ]d  %||	%   |
%   k\  z  } j                  %   d   r$ j                  %   d   |z  } j                  %   d   |z  }||
%   ||z
  |z  |	%   z  k\  z  }f t        |      D ]  % j                  %   d   |z  } j                  %   d   |z  } j                  %   d   }dg|z  }t        |      D ]&  } j                   j                  |      d   }d||<   ( ||%   ||z   t!        ||      z
  k(  z  }  j                  d   d   |z  }t        |      D ].  % j                  %   d   |z  }||%   |%   ||z   |z  z   k(  z  }0 t        |      D ]  %|||%   k\  z  } t        |      D ]`  %t         j                  %   d         D ]@  } j                  %   d   |   } j                  %   d    |   }||%   ||
%   z  |z   k\  z  }B b t        |      D ]S  % j                  %   d!   }||%   ||	%   z  k  z  }||%   ||%   z  k  z  }||%   ||%   z  |d|	%   z
  z  z
  k\  z  }U |||k  z  }|t#        |      z  }t%        d"d#d$      } |j'                  |       }!|!dk7  r"t        j)                  d%t*        |!          i dd&fS i }"t        |      D ]J  %t-        |	%   j.                        dk(  st-        |
%   j.                  d'      |" j                  %   d   <   L t-        t1        |j2                        d(      }#t-        |j.                  |z        }$|"|#|$fS ))a6  
    MILP to decide which modules to AC and how much memory to discard.
    The objective is to minimize recomputation time.
    The constraint is to ensure peak memory is under budget.

    Args:
        graph: graph representation of the model as a module submodule tree
            where each node is a submodule with memory & runtime stats
        memory_budget: memory budget in GiB
        world_size: number of GPUs. In the case of FSDP, world_size will be
            used to compute the amount of parameter and gradient memory on each rank
        ac_units: a list of user-specified AC units.
        fsdp_units: a list of FSDP units. AC units cannot be supermodules of FSDP units.

    Returns:
        Dict[str, float]: the optimal SAC solution, mapping from module fqn to
            the percentage of activation memory to **discard**
        float: the recomputation time of the optimal SAC solution
        int: upper bound on the peak memory of the optimal SAC solution.
            note that value of -1 means that the ILP solver failed to find a solution.

    d   i   @SACyr      rdamrcprctmax_mfqnc              3   V   K   | ]   }t        |j                     d           " yw)r$   N)r   nodes).0	fsdp_unitr   is     P/home/dcms/DCMS/lib/python3.12/site-packages/torch/distributed/_tools/sac_ilp.py	<genexpr>zsac_milp.<locals>.<genexpr>d   s-       YAu(=>s   &)is_leaf
sac_memoryact_fw_per_modulezFor module {%s}: z.activation memory from memory tracker is {%d},z-activation memory from SAC estimator is {%d}.z!Something is wrong. Please check!z&Overriding the latter with the former.act_grad_per_module	act_totalpos_fw_post_orderindexparam_per_module
grad_total
n_segmentsslopes
interceptssac_runtime皙?   gapRel	timeLimitmsg$Solver failed to find a solution: %s      )lenr&   r   r   r   matrixlistranger	   setany	ad_matrixloggerwarning	name2nodefw_post_orderr   r   r   solveerrorr   roundvarValuer   	objective)&r   r   r   r   r   	num_nodesMMEM_MULTIPLIERprobr   r   r   r   r    r!   r"   r#   ac_units_setjACM_iIA_iAG_iTA_iposcoeffpP_1TG_isslope	interceptACT_isolverstatusac_decisionsrecomputation_timepeak_memr)   s&   `                                    @r*   sac_milprk   #   sj   : EKK IAN UJ'D 	#tE)$45q!YGA#tE)$45q!<A#tE)$45q9A#tE)$45q9A#tE)$45q9A


E4i(8#91
=C


E4i(8#91
=Cw"E 8}y! 	"A{{1~e$L8!	!	"
 y! 	"A !+  !	!	" 9 )q1ui( 	)Aq!!$)!qtq((	)) 9 ;;q>)$AaDAID
 9 = Ay)u{{1~0
KKN./00 NN.Au0EFNN@A23 NN?A|, NN>?NNCD+0;;q>:M+NEKKN<(A|,~={{1~12^C!!!'<<<</=< 9 6!!;;q>)$A|,~={{1~12^C!.15556 9 	6{{1~34~E{{1~k*^;kk!n01is 	A 3 3A 67@AE!H	 	!teE1o555	6 ++a.+
,~
=C9 9{{1~l+n<!!d
j88889
 9 1 9 7u{{1~l34 	7AKKN8,Q/EA|4Q7ICFeadlY666D	77 9 :A}-A!ad(""A%#a&.((A%#a&.1AaD>999	: 	E]""D 	E#JD !<FZZF {;Xf=MN1by L9 J11$27!q2ILQ./J uT^^4a8U^^n45H+X55    c                       e Zd ZdZdZy)SACDecisionr   r   N)__name__
__module____qualname__	RECOMPUTESAVE rl   r*   rn   rn      s    IDrl   rn   	sac_statsc                    d|cxk  rdk  sn t        d| d      t        | j                        }t        dt              }t        j                  dt        t        |            ddt              }| j                  r5| j                  D ]%  }|||   t        j                  j                  k(  z  }' n<t        | j                  dd | j                  dd       D ]  \  }}|||   ||   k(  z  } | j                   D ]%  }|||   t        j"                  j                  k(  z  }' | j$                  D ]<  \  }}	||	k7  r|||   ||	   k(  z  }|||   t        j                  j                  k(  z  }> t'        j(                  |t+        | j,                        z        }
|t/        || j,                        |
k  z  }|t/        || j0                        z  }t3        d	d
d      }|j5                  |      }|dk7  rt6        j9                  dt:        |          g S t        |      D cg c]  }t=        ||   j>                         c}S c c}w )aB  
    This is adapted from --
    https://github.com/facebookresearch/xformers/blob/c6c0ac31f1b08542a0bc27278c6ed10f825f6963/xformers/checkpoint.py#L375

    Given the SACStats of a module, including list of operators, their memory, runtimes, and metadata,
    decide via MILP an optimal set of operators to checkpoint under a given ``memory_budget``.

    Args:
        sac_stats: the SACStats object of the module
        memory_budget: a float between zero and one

    Returns:
        List[int]: the decision whether each operator should be saved (1) or recomptued (0).
    r   r   z5`memory_budget` must be a float between 0 and 1. Got .zSAC-per-modulexNr@   r9   
   r;   r?   ) 
ValueErrorrC   
func_namesr   r
   r   rD   rE   rF   r	   force_store_randomrand_opsrn   rs   r   zipview_like_opsrr   inplace_opsmathceilsummemoryr   runtimesr   rN   rJ   rO   r   rP   rQ   )ru   r   num_opsrV   rx   r)   i1i2op	op_parent
max_memoryrf   rg   s                r*   +get_optimal_checkpointing_policy_per_moduler      sD   " #!#CM?RST
 	
 )&&'G %z2D 	#tE'N3Q9EA
 #### 	3AAaDK,,2222D	3 ),,Sb193E3Eab3IJ 	#FBAbEQrUN"D	# $$ 4!--33334 #.. 4I?AbEQy\))DAbE[--3333D	4 =3y/?/?+@@AJE!Y%%&*44D
 	E!Y''((D ;FZZF {;Xf=MN	 +0.9QE!A$-- 999s   H?)r   NN)&loggingr   enumr   typingr   "torch.distributed._tools.ilp_utilsr   r   &torch.distributed._tools.sac_estimatorr   pulpr   r	   r
   r   r   r   r   r   r   r   ImportErrorerr	getLoggerro   rJ   setLevelINFOfloatintrE   strtupledictrk   rn   r   rt   rl   r*   <module>r      s       B ;  $ 
		8	$   $(&*v6v6v6 v6 tCy!	v6
 c#v6 4U
UC'(v6r' 
F:F:(-F:	#YF:U  
Ls   B- -C 2	B;;C 