
    VhD                        d dl Z d dlZd dlZd dlmZ d dlmZ d dlZd dlm	c m
Z d dlmc m
Z d dlmZmZmZ d dlmZmZ d dlmZmZ  e j2                  e      Zdej                  j8                  deej                  j8                     fd	Zd
ej                  j8                  ddfdZdej                  j@                  ddfdZ!dej                  j@                  dede"e#ej                  j8                  f   de$e%ej                  j8                     e"e#ej                  j8                  f   f   fdZ&dej                  j@                  de%ej                  j8                     de"e#ej                  j8                  f   dejN                  jP                  de#dej                  j8                  fdZ)dej                  j@                  dedej                  j8                  de"e#ej                  j8                  f   de$ej                  j8                  df   ddfdZ*dej                  j@                  de"e#ejN                  jP                  f   de"e#ef   dej                  j@                  fdZ+dej                  j@                  deddfdZ,dede"e#ejN                  jP                  f   dej                  j@                  fdZ-y)     N)defaultdict)Optional)ConstantArgumentExportedProgramModuleCallSignature)legalize_graphNodeList)erase_nodesfuse_as_graphmodulenodereturnc                 d   t        | j                  j                               }t               }|D ]}  }|j                  dk(  r|j                  dk(  r|j
                  t        j                  k(  sJ d|  d|        |j                  t        |j                  j                                       |S )Noutputcall_functionz"Expected getitem node as user for z, instead got )	listuserskeyssetoptargetoperatorgetitemupdate)r   
node_usersgetitem_usersusers       B/home/dcms/DCMS/lib/python3.12/site-packages/torch/export/_swap.py_get_getitem_usersr      s    djjoo'(JEM 677h GG&4;;(:J:J+J	K/v^D6J	KJT$**//"3456     curr_module_nodec                    t         j                  d|        t        | j                  j	                               }t        |      dk(  sJ dt        |              |d   }|j                  dk(  r|j                  t        j                  k(  sJ t        |      }t        |      dk7  rt         j                  d||       yt        t        |            }|j                  dk(  r|j                  t        j                  k(  st         j                  d||       yt        |j                   d         D ]  \  }}||j                  vr&t         j                  d	| |j                   d           y|j                  dk(  r0|j                  t"        j$                  k(  r|j                   d   |k(  ryt         j                  d
| |j                   d           y t'               }t        |      }|D ]4  }	|j)                  t        |	j                  j	                                      6 t        |      dk7  rt         j                  d||       yt        t        |            }
|
j                  dk(  st         j                  d||
       y| f|
_        y)a9  
    We want to try to remove extraneous pytree flatten/unflatten calls between modules
    calls. Instead of having the following:
    graph():
        ...
        %foo : [num_users=1] = call_module[target=foo](args = (%getitem_1, %getitem_2), kwargs = {})
        %tree_flatten_spec : [num_users=1] = call_function[target=torch.fx._pytree.tree_flatten_spec](args = (%foo, %_spec_1), kwargs = {})
        %getitem_4 : [num_users=1] = call_function[target=operator.getitem](args = (%tree_flatten_spec, 0), kwargs = {})
        %tree_unflatten_1 : [num_users=2] = call_function[target=torch.utils._pytree.tree_unflatten](args = ([%getitem_4], %_spec_2), kwargs = {})
        %getitem_5 : [num_users=1] = call_function[target=operator.getitem](args = (%tree_unflatten_1, 0), kwargs = {})
        %getitem_7 : [num_users=0] = call_function[target=operator.getitem](args = (%tree_unflatten_1, 1), kwargs = {})
        %getitem_6 : [num_users=1] = call_function[target=operator.getitem](args = (%getitem_5, 0), kwargs = {})
        %bar : [num_users=1] = call_module[target=bar](args = (%getitem_6,), kwargs = {})
        ...

    We could do the following, if we know that all the outputs of `foo` feed into `bar`:
    graph():
        ...
        %foo : [num_users=1] = call_module[target=foo](args = (%getitem_1, %getitem_2), kwargs = {})
        %bar : [num_users=1] = call_module[target=bar](args = (%getitem_6,), kwargs = {})
        ...

    Currently this optimization only works for the case where all of the outputs
    of `foo` go directly into `bar`, and `bar` has no other inputs.
    z+Trying to remove pytrees for module call %s   z4Expected only one user for module node, instead got r   r   zaMore than one user found for flatten node, %s: %s. Unable to fuse it with another unflatten call.NzTFlatten node %s's user is not a pytree.tree_unflatten. Instead it is: %s. Passing...zModule %s's outputs are not all directly used as inputs to the subsequent module. Unable to fuse the connecting flatten/unflatten. The inputs to the subsequent module are: %s. zModule %s's outputs are not all directly used in the same order as outputted. Unable to fuse the connecting flatten/unflatten. The inputs to the subsequent module are: %s. zaMore than one user found for unflatten node, %s: %s. Unable to fuse it with another flatten call.call_modulezLUnflatten node %s's user is not a call_module. Instead it is: %s. Passing...)logdebugr   r   r   lenr   r   	fx_pytreetree_flatten_specr   nextiterpytreetree_unflatten	enumerateargsr   r   r   r   )r    curr_module_usersflatten_nodeflatten_getitem_usersunflatten_nodeiargunflatten_getitem_getitem_usersunflatten_getitem_usersunflatten_getitem_usernext_module_nodes              r   _try_remove_connecting_pytreesr9   $   s   6 II;=MN-3388:;!#X	=dCT>U=VWX#$Q'L?*9#>#>>	? /|<
 !Q&		=!		
 	$456N_,!!V%:%::		,		
 	N//23 3l(((IIS !##A&  FFo%

h...q II. !##A& 16 '*e#0@"9 
'..'--2245	


 *+q0		;+		
 	D!@AB=0		,		
 	 ./r   gmc                     | j                   j                  D ]  }|j                  dk(  st        |        | j                   j	                          y)a)  
    Remove extraneous pytree flatten/unflatten calls.

    We try a couple of optimizations here:
        1. Remove pytree flatten/unflatten calls between modules
        2. TODO: Remove module's in_spec + initial unflatten call
        3. TODO: Remove module's out_spec + final flatten call
    r#   N)graphnodesr   r9   eliminate_dead_code)r:   r   s     r   _remove_extraneous_pytreesr?      sA      177m#*401 HH  "r   	signaturenode_name_mapc           	         g }|j                   D ]n  }t        |t              r|j                  |j	                  d        1|j
                  |vr|j	                  d        Q|j	                  ||j
                            p ddlm}  || ||j                        }|j                  j                  dk(  sJ |j                  j                  d   }|j                  J | j                  j                  t        j                  |df      }t!        |j                        D 	cg c].  }	| j                  j                  t        j                  ||	f      0 }
}	|j                  j                  d   }|j                  J | j                  j                  t        j                  |df      }|j                  D ci c]/  }|| j                  j                  t        j                  ||f      1 }}|
|fS c c}	w c c}w )Nr"   )_generate_unflatten   r   )inputs
isinstancer   valueappendname	unflattenrC   in_specnum_childrenchildren_specscontextr<   r   r   r   range)r:   r@   rA   tree_unflatten_argsinput_rC   r2   	args_spec	args_noder3   
args_nodeskwargs_speckwargs_nodekkwargs_nodess                  r   _construct_inputsrY      s   
 :<"" 	Cf./FLL4H  &&t,[[-&&t,&&}V[['AB	C /(-@)BSBSTN))Q...!!003I$$$&&x'7'7.!9LMI y--. 	x//)Q@J 
 ##2215K***(((()9)9NA;NOK $$ 	
288!!("2"2[!4DEEL  |##s   3G/54G4rT   rX   module_to_swaprI   c                     ddl m}m}  ||| ||j                         | j                  j                  |t        |      |      }|S )Nr"   )_assign_attr	_AttrKind)rJ   r\   r]   MODULEr<   r#   tuple)r:   rT   rX   rZ   rI   r\   r]   module_nodes           r   _insert_call_modulera      s>     3T9+;+;<((&&tU:->MKr   r`   orig_outputs.c                     ddl m}  || ||j                        }t        |      D ]S  \  }}t        j
                  j                  |      |   j                  }	|j                  |	d       |	||j                  <   U y )Nr"   )_generate_flatten_specT)propagate_meta)
rJ   rd   out_specr-   torchfxProxyr   replace_all_uses_withrI   )
r:   r@   r`   rA   rb   rd   r0   r3   orig_output	proxy_outs
             r   _deconstruct_outputsrm      ss     2)"k9;M;MNL#L1 4;HHNN<0388	)))D)I*3k&&'4r   modules_to_swapmodule_call_graphc                    t         j                  d       t         j                  | j                         t        |        t	        t
              }| j                  j                  D ci c]  }|j                  | }}| j                  j                  D ]R  }|j                  j                  d      x}s!|j                         D ]  \  }}||v s||   j                  |        R T |j                         D ]  \  }	}
	 |	j                  dd      }t        | |
d|       \  }}}t         j                  d       t         j                  |j                         ||	   }t        | ||      \  }}t!        | ||||	   |	      }t#        | ||||       t%        | |
       t         j                  d       t         j                  | j                          t        |        t         j                  d       t         j                  | j                         t'        |        t         j                  d	       t         j                  | j                         | j)                          | S c c}w )
NzStarting graph:nn_module_stack._fused_zFused subgraph nodes:zSwapped graph:z#Before removing extraneous pytrees:z"After removing extraneous pytrees:)r$   r%   r<   r   r   r   r=   rI   metagetvaluesrH   itemsreplacer   rY   ra   rm   r
   r?   	recompile)r:   rn   ro   
partitionsr   rA   rq   pathrs   rI   r=   submod_namesub_gmorig_inputsrb   r@   rT   rX   r`   s                      r   _swap_module_helperr      s   
 II IIbhh2&1$&7J %'HHNN/ 		4/M /
  "iimm,=>>?>*113 a?*t$++D1 "'') >e(	T ll3,,?}--
)\ 			)*		&,,):4)@	#4RM#R 
L)
L/$*?
 	RKUB		"#		"((}>@ 2II34IIbhhr"II23IIbhhLLNIo/s   'I!c                 z   ddl m}m} t        j                  j
                  j                         | j
                  _        | j
                  j                  D cg c]  }|j                  dk(  s| }}g }|j                  }|g }|j                  j                  dk(  sJ |j                  j                  d   }|j                  j                  d   }	|j                  t        k(  sJ |	j                  t         k(  sJ t#        |j                        D ]  }
|j%                  d|
         |j'                  |	j(                         |D ]S  }| j
                  j+                  |d         5  |j%                  | j
                  j-                  |             ddd       U | j
                  j+                  |d         5   || t        |            }t/        |      D ]*  \  }
}d|_	        t0        j2                  |_        ||
f|_        , 	 ddd       t9        d	 | j
                  j                  D              }| j
                  j+                  |      5   || |j6                  d   |j:                        }|f|_        ddd       | j=                          yc c}w # 1 sw Y   [xY w# 1 sw Y   xY w# 1 sw Y   8xY w)
a`  
    Given the unlifted module from calling ep.module(), we want to remove the
    pytree processing from the graph module's PyTreeCodeGen and instead make it
    nodes inside of the graph. This allows us to do some optimizations, like
    remove these pytree calls if it is unnecessary, and makes the PyTree part
    more obvious to graph passes.
    r   )_generate_flattenrC   placeholderNrD   r"   arg_r   c              3   @   K   | ]  }|j                   d k(  s|  yw)r   N)r   ).0r   s     r   	<genexpr>z._fix_input_output_signature.<locals>.<genexpr>  s     N$''X:MtNs   )torch.export.unflattenr   rC   rg   rh   r<   CodeGen_codegenr=   r   forward_arg_namesrK   rL   rM   typer_   dictrO   rH   extendrN   inserting_beforer   r-   r   r   r   r.   r)   rf   rz   )r:   r@   r   rC   r   old_placeholdersnew_placeholdersr   arg_spec
kwarg_specr3   r4   	flat_nodeold_placeholderoutput_nodeunflats                   r   _fix_input_output_signaturer   ]  so    N ..0BHH)+T477m;STT!33   --222$$33A6&&55a8
}}%%%$&&&x,,- 	1A$$tA3Z0	1  !3!34  ?XX&&'7':; 	?##BHH$8$8$=>	? 	??
 
	"	"#3A#6	7 2%b%0@*AB	"+,<"= 	2A!0O%-%5%5O"$-q>O 	22 NNNK		"	";	/ %$R)9)9!)<i>P>PQ"9% LLNC U 	? 	?2 2% %s1   J-J(+J;AJ%*J1J"	%J.1J:epc                    | j                   D ci c]&  }|j                  s|j                  |j                  ( }}| j                         }d|_        |j
                  j                          t        |t        j                  j                        sJ t        || j                   d   j                         | j                   |_         t        j                  t        |      j                  |      |_        t        j                  t        |      j                   |      |_        t        |t        j                  j                        sJ t#        |||      }|S c c}w )ab  
    Unlifts the given ExportedProgram into a fx.GraphModule, and then swaps
    previously traced modules with new eager modules specified. Returns a
    fx.GraphModule with a custom forward function.

    Args:
        ep (ExportedProgram): Exported program to modify
        modules_to_swap (Dict[str, torch.nn.Module]): Mapping from module fqn to
            eager module to swap with. The specified module fqn should have also
            been specified in the `preserve_module_call_signature` argument to
            torch.export so that we know how to restore the calling convention
            to this argument.
        run_with_interpreter: Whether or not to run the graph using
            fx.Interpreter. Setting to true will help result in better error
            messages and easier debugging, but it has found to result in a QPS
            drop.
    Fr   )ro   r@   fqnmodulevalidate_inputsr<   r>   rF   rg   rh   GraphModuler   types
MethodTyper   trainevalr   )r   rn   entryro   r:   s        r   _swap_modulesr     s   * 130D0D',		5??"  
BBHH  "b%((..///B$8$8$;$E$EF//BR3BHtBx}}b1BGb%((..///	R2C	DBI#s
   EE).loggingr   r   collectionsr   typingr   rg   torch.fx._pytreerh   _pytreer'   torch.utils._pytreeutilsr+   torch.export.exported_programr   r   r   torch.fx.passes.tools_commonr   r	   !torch.fx.passes.utils.fuser_utilsr
   r   	getLogger__name__r$   Noder   r   r9   r   r?   r   strr_   r   rY   nnModulera   rm   r   r   r    r   r   <module>r      s      #   $ $ $ $ 
 B N g!UXX]] s588==/A t0UXX]] t0t t0n#588#7#7 #D #"'$'$"'$ UXX]]*+'$ 4S%((--%7 889	'$TUXX]]# sEHHMM)* HHOO	
  XX]]44"4 4 UXX]]*+	4
 s*+4 
4&cc#uxx./c C!445c XX	cL11)<1	1h%%*.sEHHOO/C*D%
XX%r   