
    Vh$                        d Z ddlZddlZddlmZ ddlmZ ddlZddl	m
Z
 ddlmZ ddlmZmZ  ej                   e      Zd	eeef   fd
Zded	ee   fdZdej.                  j0                  dededeeeef      dee   dej.                  j6                  dedeeef   d	dfdZded	eeeeef   f   fdZdee   d	ee   fdZdede e   d	dfdZ!dej.                  j0                  ded	eeeef   ef   fdZ"dej.                  j0                  dee   d	dfdZ#dedee   d	eej.                  j0                  eeeef   ef   f   fdZ$y) a"  
This module implements graph deduplication functionality for TorchDynamo's optimization pipeline.
Graph deduplication identifies identical subgraphs in the computational graph and merges them
to reduce redundancy and improve performance. The process involves analyzing regions of the graph,
identifying structurally equivalent regions, and replacing them with a single shared implementation.
This optimization is particularly effective for models with repeated patterns or similar computational
structures across different parts of the network.
    N)IterableAny)%has_potential_input_alias_or_mutation)tree_flatten   )NodeRegionreturnc                    | j                   j                  | j                        }i }|D ]  }t        |      }|d   }t	        ||      \  }}t
        j                  j                  | j                  |      }| j                  d|      }	| j                  j                         5  | j                  j                  d|	di       }
ddd       |D ],  }t        | j                  |
|j                         |||	|       .  |S # 1 sw Y   >xY w)a  
    This is the main entry point for applying the graph deduplication pass. Deduplication occurs in two phases:
    1. Subgraph creation:
        Subgraph creation works by taking one representative region from each region group and creating a subgraph from it, which will then be used to replace all regions in the group. This is implemented by first copying all nodes of the region to the new subgraph and then finding all inputs which are not within the region and creating placeholders for them. For the outputs, all regions in a region group need to be scanned to ensure the largest set of outputs is found, and then an output node is created which returns a tuple of all outputs.

    2. Graph replacement:
        To replace each region with the extracted subgraph, the node index in the region and argument index within the node's flattened args and kwargs are recorded once during subgraph creation. This allows us to determine which (external to the region) nodes and in which order these nodes are passed as inputs. For the outputs, getitem nodes are created for each output, and all nodes in the region with external outputs are replaced by the proper getitem node. Finally, all original nodes are erased (there should be no uses of these left in the graph).

The deduplication mutates the output_graph argument in place.

Returns a mapping of nodes to their subgraph output replacement node to remap outputs
when they are created in output_graph.
    r   subgraphget_attr N)region_trackerget_identical_regionsgraph_get_all_output_indices_create_subgraphtorchfxGraphModule
nn_modulesinstall_subgraphinserting_beforecreate_node_replace_region_with_subgraphkeys)output_graphduplicated_region_groupsoutput_replacementsregion_groupinds_with_external_usersregionr   node_ind_arg_indssub_gmsubgraph_nameget_subgraph_nodes              Q/home/dcms/DCMS/lib/python3.12/site-packages/torch/_dynamo/graph_deduplication.pyapply_graph_deduplicationr)      s$   6  ,::PP  -/0 #:<#H a V%=>	
%%l&=&=xH$55j&I002 	 , 2 2 > >M2r!	 # 
	F)""!!&&((#	
	2 !	 	s   "C>>D	argsc                 <    g dt         dd ffd |        S )Nr*   r   c                     t        |       \  }}|D ]S  }t        |t              r0|j                  }|j                  }|j
                  } |||f       Cj                  |       U y N)r   
isinstanceslicestartstopstepappend)	r*   	flattened_argr0   r1   r2   flattenfully_flatteneds	          r(   r7   z%_flatten_args_kwargs.<locals>.flattena   sb    #D)	1 	,C#u%		xxxxd+,&&s+	,    r   )r*   r7   r8   s    @@r(   _flatten_args_kwargsr:   ^   s*    O	,c 	,d 	, DMr9   r   r#   r'   node_ind_arg_indr"   r%   r&   r    c           	      0   g }|D ]?  \  }	}
||	   }t        |j                  |j                  f      }|j                  ||
          A ||t	        |      f}|D cg c]  }|j
                  d    }}t        ||      rt        j                  d|       y |d   }| j                  |      5  | j                  dt        j                  j                  j                  |i       }| j                  |      5  t        |      D ]F  \  }}||   }| j                  dt         j"                  ||fi       }|||<   |j%                  |d       H 	 d d d        t'        |      D ]  }| j)                  |        	 d d d        y c c}w # 1 sw Y   9xY w# 1 sw Y   y xY w)Nexample_valuezBNYI: Failed to substitute region %s due to input alias or mutationcall_functionT)propagate_meta)r:   r*   kwargsr3   tuplemetar   logdebuginserting_afterr   r   opshigher_orderinvoke_subgraph	enumerateoperatorgetitemreplace_all_uses_withreversed
erase_node)r   r#   r'   r;   r"   r%   r&   r    sub_argsnode_indarg_indnodeflattened_args_kwargsinvoke_argsfake_inputslatest_region_nodeinvoke_subgraph_nodeindexternal_user_indsubgraph_outputs                       r(   r   r   q   s    H- 8'h 4dii5M N-g678
 %mU8_EK:BC$499_-CKC,V[A		P	
 				1	2 #$00UYY33CC[RT 
 ""#78 	Q*34L*M Q&&/0"'"3"3#X%5%58Lc7RTV# -<#D)**?4*PQ	Q V$ 	#DT"	## # D	Q 	Q	# #s,   E;*AF2AF )F F		FFc                    t               }t        |       }t        |       D ][  \  }}t        |j                  |j
                  f      }t        |      D ]'  \  }}t        |t              s||vs||vs!||f||<   ) ] |S r-   )dictsetrJ   r:   r*   rA   r.   r	   )r#   external_node_to_indicesregion_uniquerQ   rS   rT   rR   in_nodes           r(   _get_external_inputsrb      s      $vKM#F+ H$ 4dii5M N )*? @ 	HGW7D)=0#;;5=w4G(1	HH $#r9   regionsc                 R    t               }| D ]  }t        ||        t        |      S r-   )r^   _get_inds_with_external_userssorted)rc   r"   r#   s      r(   r   r      s6     *- H%f.FGH *++r9   inds_uniquec                     t        |       D ]1  \  }}|j                  D ]  }|| vs||vs|j                  |        3 y r-   )rJ   usersadd)r#   rg   rY   rS   users        r(   re   re      sI    v& )	TJJ 	)D6!k)OOC(	))r9   r   c                   	 t        |      }i }i 	|j                         D ]/  }| j                  d|j                         }|	|<   ||   }d ||<   1 dt        dt        f	fd|D ]  }| j                  |fd      }|	|<    |S )Nsubgraph_input_rS   r   c                     | v r|    S | S r-   r   )rS   region_to_subgraph_nodes    r(   map_argz-_copy_nodes_and_remap_inputs.<locals>.map_arg   s    ***400Kr9   c                      |       S r-   r   )oldrp   s    r(   <lambda>z._copy_nodes_and_remap_inputs.<locals>.<lambda>   s    WS\ r9   )rb   r   placeholdernamer	   	node_copy)
r   r#   external_inputs_to_indicesindices_to_placeholder_indrS   rt   arg_indicessubgraph_noderp   ro   s
           @@r(   _copy_nodes_and_remap_inputsr{      s     "6f!==? *//1 7**_TYYK+HI(3%06 37";/7d t   6 **41IJ(5%6 &%r9   inds_to_outputc                     | j                   D cg c]  }|j                  dvs| c}t        fd|D              }| j                  |       y c c}w )N)rt   outputc              3   (   K   | ]	  }|     y wr-   r   ).0rY   	node_lists     r(   	<genexpr>z+_create_subgraph_outputs.<locals>.<genexpr>   s     =sIcN=s   )nodesoprB   r~   )r   r|   nout_tupr   s       @r(   _create_subgraph_outputsr      sG     %NNTqadd:S.STI=n==GOOG Us
   AAc                 v    t         j                  j                         }t        ||       }t	        ||       ||fS r-   )r   r   Graphr{   r   )r#   r"   r   node_ind_input_indss       r(   r   r      s8      %xx~~/H6xHX'?@(((r9   )%__doc__loggingrK   collections.abcr   typingr   torch.fxr   torch._higher_order_ops.utilsr   torch.utils._pytreer   graph_region_trackerr	   r
   	getLogger__name__rD   r]   r)   listr:   r   r   rB   intr   strr   rb   r   r^   re   r{   r   r   r   r9   r(   <module>r      s     $   O , . g!=tD$J/? =Js tDz &*#88>>*#*# *# uS#X/	*#
 #3i*# HH  *# *# dDj)*# 
*#Z$$	$c3h
 $$,T&\ ,d3i ,)& )s3x )D )&hhnn&&,&	%S/3
&6hhnn.23i	))"3i) 588>>4c3h 4556)r9   