
    Vh                        d dl Z d dlZd dlZd dlmZmZ d dlmZmZm	Z	m
Z
mZ d dlZd dlmZ d dlmZmZ d dlmZ d dlmZmZ d dlmZmZ d d	lmZmZmZ d
dlmZ d
dlm Z m!Z!m"Z"m#Z#m$Z$ d
dl%m&Z&  ejN                  e(      Z)	 	 ddedeejT                  e+f   de
e,ej                        de
e,e,ejZ                  ee   f         de+f
dZ. G d d      Z/dedefdZ0 e jb                  d      dedefd       Z2 G d de&jf                        Z4y)    N)IterableSequence)AnyCallablecastOptionalUnion)Expr)free_unbacked_symbolsShapeEnv)
OrderedSetFloorDivModularIndexing)symbol_is_typeSymT)bound_sympyIntInfinityValueRanges   )is_power_of_2)has_free_symbolssympy_index_symbolsympy_index_symbol_with_prefix
sympy_subs	VarRanges)V	shape_envexpraxiomsvar_to_rangereturnc                     |dv rt        |      S 	 | j                  |||      }|t        |      S 	 y# t        $ r t        j	                  d|d       Y yw xY w)N)TFr    r!   zCould not simplify  %sT)exc_infoF)bool_maybe_evaluate_static	Exceptionlogdebug)r   r   r    r!   
simplifieds        H/home/dcms/DCMS/lib/python3.12/site-packages/torch/_inductor/sizevars.pyevaluate_exprr-      s     }Dz	A55% 6 


 !
## "
   A		*D4	@As    4 !AAc            
       Z    e Zd Zd9d: fdZdefdZdeeegef   fdZd Z	dededefd	Z
d
eej                     fdZdeej                   ef   defdZdeeef   deeef   defdZdee   dee   defdZdedeeef   defdZdedeeef   defdZdedeeef   defdZdedeeef   defdZdedeeef   defdZdedefdZdededefdZdededdfdZdededdfdZd Z	 d;deeej@                  jB                  jD                  f   dedefdZ#dededefdZ$dededefd Z%deeef   defd!Z&de'eeef      dee   fd"Z(dedefd#Z)deeef   deeef   fd$Z*dd%deeef   d&e+e   defd'Z,dd%d(e-e   d&e+e   de.ed)f   fd*Z/d9d+Z0d, Z1d-ed.e'ej                     d/e'ej                     dee   fd0Z2dd%deeef   d&e+e   deeef   fd1Z3d-ed.eej                     defd2Z4	 d9d-ed.e'ej                     d/e+e'ej                        dee   fd3Z5d-ed.eej                     dee   fd4Z6dedefd5Z7de8ej                     fd6Z9d-ej                  dej                  fd7Z:d-ej                  deee.ej                  ej                  f   f   fd8Z; xZ<S )<SizeVarAllocatorNr"   c                 N   t         |           |
t               }|| _        | j                  j                  | _        | j                  j
                  | _        i | _        i | _        | j                         | _	        | j                         | _        | j                         | _        y N)super__init__r   r   
var_to_valreplacementsprecomputed_replacementsinv_precomputed_replacementsmake_stride_vars_cachestride_varsmake_simplify_with_ranges_cachesimplify_with_rangesmake_simplify_loops_cache_simplify_loops)selfr   	__class__s     r,   r3   zSizeVarAllocator.__init__:   s     
I"..336:nn6Q6Q CE%FH)668$($H$H$J!#==?    r   c                 ^    t        j                  |      j                  | j                        S r1   )sympyexpandxreplacer5   r>   r   s     r,   simplifyzSizeVarAllocator.simplifyQ   s"    ||D!**4+<+<==r@   c                 n     i t         j                        dt        dt        dt        f fd}|S )R
        self._simplify_with_ranges() can be expensive, cache its results
        r   
var_rangesr"   c                 8   t        j                        k7  r%j                          t        j                        | g|j                         }j	                  |d       }|3j                  | |      }||<   || k7  r||g|j                         <   |S r1   )lenr5   clearitemsget_simplify_with_ranges)r   rI   keyresultcachereplacement_countr>   s       r,   r;   zNSizeVarAllocator.make_simplify_with_ranges_cache.<locals>.simplify_with_ranges[   s     C(9(9$::$'(9(9$:!-***,-CYYsD)F~33D*E#c
T>;AE67J$4$4$678Mr@   )rK   r5   r
   r   )r>   r;   rR   rS   s   ` @@r,   r:   z0SizeVarAllocator.make_simplify_with_ranges_cacheT   s=     .0 1 12	t 	 	t 	 $#r@   c                 H     i t         j                         fd}|S )rH   c                     t        j                        k7  r%j                          t        j                        g | ||}j                  |d       }|j	                  | ||      }||<   |S r1   )rK   r5   rL   rN   _simplify_loops_impl)
index_varssizesindex_formulasrP   rQ   rR   rS   r>   s        r,   simplify_loopszBSizeVarAllocator.make_simplify_loops_cache.<locals>.simplify_loopss   s{     C(9(9$::$'(9(9$:!8J888CYYsD)F~22:unU#c
Mr@   )rK   r5   )r>   rZ   rR   rS   s   ` @@r,   r<   z*SizeVarAllocator.make_simplify_loops_cachel   s(     -/ 1 12	 r@   rI   c                      t         j                  |            }|}t         j                  j                        }|j                  j                         D ci c]5  \  }}|t        dt        |g      st        d|dz
        n	t                     7 c}}       |j                  D ]  }||vst        dt                     ||<     t        t        t        t        j                  t        t        j                      f      t        |j                                     g j                         D ]-  \  }}j#                  d|k         j#                  ||k         / t               j                  j%                         z    fdfdfd}	fd}
|j'                  t(              r\|j+                  t)        t        j,                  dd	      t        j,                  d
d	      t        j,                  dd	            |
      }|j'                  t.              rF|j+                  t/        t        j,                  dd	      t        j,                  d
d	            |	      }||k7  r j1                  |      S |S c c}}w )zk
        Simplify indexing expression with knowledge of the ranges of
        iteration variables.
        r   r   c                 V    j                   j                  |       }t        |      S )Nr$   )r   r'   r&   )r   	evaluatedr    r>   var_to_range_tuples     r,   statically_knownz@SizeVarAllocator._simplify_with_ranges.<locals>.statically_known   s1    ==/ > I
 	?"r@   c                 0    | dk\        s| S | j                   D ]x  }|v st        j                  d|g      }| j                  ||z         }|s7|||   j                   vsIt        j                  ||   |      }||k(  sh ||k        st||   } z | S )z)Symbols smaller than the divisor are zeror   _rest)exclude)free_symbolsrB   Wildmatchgcd)basedivisorvrestmrf   r_   rI   s         r,   remove_zero_termszASizeVarAllocator._simplify_with_ranges.<locals>.remove_zero_terms   s    #DAI.&& 
/
? !::gs;D

1t8,AQag&:&::#ii$9'>/G<'(w
/ Kr@   c                 *    t         | |      |      S r1   )r   )rg   rh   rl   s     r,   visit_indexing_divzBSizeVarAllocator._simplify_with_ranges.<locals>.visit_indexing_div   s    -dG<gFFr@   c                      | |      }  | dk\        xr  | ||z  k        }|rt        | |      S t        | ||      S Nr   r   )rg   rh   moduluscan_remove_modrl   r_   s       r,   visit_modular_indexingzFSizeVarAllocator._simplify_with_ranges.<locals>.visit_modular_indexing   sX    $T73D-dai8 =Mw((>N g.."4'::r@   rg   Tintegerrh   rq   )join_dimensionsrF   dictr   r!   updaterM   r   r   maxr   rc   r   tuplerB   Symbolr
   append
get_axiomshasr   replacerd   r   rO   )r>   r   rI   original_exprr!   kri   varupper_boundrn   rs   r    rl   r_   r^   s   ` `        @@@@r,   rO   z&SizeVarAllocator._simplify_with_ranges   s7    t}}T23DNN778
 ',,.	 Aq ;,<aS,As1a!e}{} 	
 $$ 	BC,&$/;=$AS!	B "%k%**&==>?,$$&'

  * 0 0 2 	-CMM!s(#MM#+,	- v!:!:!<<	#	$	G		; 88O$<<JJvt4JJy$7JJy$7
 'D 88H<<JJvt4JJy$7 #D = --dJ??gs    :J

rW   c           
          t        t         j                              D cg c]=  }t        |t        j
                        r j                  |      ndgt              z  ? c}t              t        d         k(  sJ t              t        d         f       t        t                    D ]  }|   dk(  sd|<     fd}d}|rd}t        j                  t        t        t                          t        t        t                                D ]4  \  }}||k(  s
|   |    |||      s d}|   |   z  |<   d|<   6 |rfd}	fd}
D cg c]  }||	 c}|	|
fS c c}w c c}w )	a  
        Try to remove as many axis from loop iterations as possible, by:
            1) removing size==1 dimensions
            2) fuse contiguous dimensions into a single loop
            If channel_last = True, we will prevent the last dim fused with other dims
        r   r   Nc           	         t        t                    D ]  }j                  |   |    |    z        j                  |   |         k(  rs
|    }
|   }t        d      }t        d      }t	        	|   |||    z  ||i      }t	        	|   |d|||z   i      }j                  |      j                  |      k(  r y y)N_merge_tester1_merge_tester2r   FT)rangerK   rF   r   r   )abr   vavbm1m2expr1expr2rY   rW   r>   rX   stridess            r,   can_merge_dimsz=SizeVarAllocator._simplify_loops_impl.<locals>.can_merge_dims   s    3w<( ==Aq!9:dmmAJqM?  $AB#AB+,<=B+,<=B '~a'82rE!H}bRT:UVE&~a'82q"rBw:PQE}}U+t}}U/CC   r@   TFc                     t        t        |             }g }D ]M  }|*|j                  t        j                  j
                         /|j                  |j                                O |rJ |S r1   )listreversedr|   rB   SZeropop)indexit	new_indexsizerX   s       r,   reindexz6SizeVarAllocator._simplify_loops_impl.<locals>.reindex   sc    huo&BI /<$$UWW\\2$$RVVX.	/
 M6r@   c                     t        |       t              k(  sJ t        |       D cg c]
  \  }}|	| c}}S c c}}w r1   )rK   zip)r   isrX   s      r,   prunez4SizeVarAllocator._simplify_loops_impl.<locals>.prune+  s<    u:U+++"%eU"3E$!Qq}AEEEs   
==)r   maprF   
isinstancerB   r
   r9   rK   r   	itertoolsproductr   )r>   rW   rX   rY   xr   r   changedjr   r   r   s   ````       @r,   rV   z%SizeVarAllocator._simplify_loops_impl   s    S./ $
  a,   J/S3z?*+
 5zS_,Ks5z3wqz?.KK,s5z" 	 AQx1}a	 
	 	& G!))s5z*+XeCJ6G-H $1 6U1X-q1A!!Q'"G$Qx%(2E!H#E!H$ 			F !2aAM2GUBBE
D 3s   AF0F8Fc                 .    t        | j                  |      S r1   )r-   r   rE   s     r,   is_expr_static_and_truez(SizeVarAllocator.is_expr_static_and_trueK  s    T^^T22r@   leftrightc                 L    | j                  t        j                  ||            S )zf
        Returns a bool indicating if it is sound to optimize as if left and right are equal.
        )r   rB   Eqr>   r   r   s      r,   statically_known_equalsz(SizeVarAllocator.statically_known_equalsN  s      ++EHHT5,ABBr@   c                 r     t        |      t        |      k(  xr t         fdt        ||      D              S )zl
        Returns a bool indicating if it is sound to optimize as if left and right lists are equal.
        c              3   H   K   | ]  \  }}j                  ||        y wr1   )r   ).0lrr>   s      r,   	<genexpr>z@SizeVarAllocator.statically_known_list_equals.<locals>.<genexpr>[  s'      /
371aD((A./
s   ")rK   allr   r   s   `  r,   statically_known_list_equalsz-SizeVarAllocator.statically_known_list_equalsW  s:     4yCJ& 
3 /
;>tU;K/
 ,
 	
r@   c                 .    ||k  }| j                  |      S )zq
        Returns a bool indicating if it is sound to optimize as if left is less than or equal to right.
        r   r>   r   r   r   s       r,   statically_known_leqz%SizeVarAllocator.statically_known_leq`       u}++D11r@   c                 .    ||k\  }| j                  |      S )zt
        Returns a bool indicating if it is sound to optimize as if left is greater than or equal to right.
        r   r   s       r,   statically_known_geqz%SizeVarAllocator.statically_known_geqh  r   r@   c                 .    ||k  }| j                  |      S )ze
        Returns a bool indicating if it is sound to optimize as if left is less than right.
        r   r   s       r,   statically_known_ltz$SizeVarAllocator.statically_known_ltp       e|++D11r@   c                 .    ||kD  }| j                  |      S )zh
        Returns a bool indicating if it is sound to optimize as if left is greater than right.
        r   r   s       r,   statically_known_gtz$SizeVarAllocator.statically_known_gtx  r   r@   	numeratordenominatorc                     t        |      st        |      ryt        j                  ||z  d      }| j                  |      S )z|
        Return a bool indicating if it is sound to optimize for the numerator being a multiple of the denominator.
        Fr   )r   rB   r   r   )r>   r   r   r   s       r,   statically_known_multiple_ofz-SizeVarAllocator.statically_known_multiple_of  s=     !+/D[/Qxx	K/3++D11r@   c                 b    t        |t        j                        xr t        t	        |            S )zM
        Returns a bool indicating if x is known to be a power of 2.
        )r   rB   Integerr   intrE   s     r,   statically_known_power_of_2z,SizeVarAllocator.statically_known_power_of_2  s#     $.K=T3KKr@   c                 ^   t        |t              rt        || j                        }t        |t              rt        || j                        }t	        j
                  ||      }| j                  j                  |      }|t        |      sJ |S | j                  j                  |d      sJ |S )Nguard_equals)
r   r
   r   r7   rB   r   r   r'   r&   defer_runtime_assertr>   r   r   r   static_exprs        r,   r   zSizeVarAllocator.guard_equals  s    dD!dD$E$EFDeT"ud&G&GHExxe$nn;;DA"$$$K~~224HHHr@   c                 ,    | j                  ||dz         S )Nr   )guard_ltr   s      r,   	guard_leqzSizeVarAllocator.guard_leq  s    }}T519--r@   c                     t        j                  ||      }| j                  j                  |      }|t	        |      sJ y | j                  j                  |d      sJ y )Nr   )rB   Ltr   r'   r&   r   r   s        r,   r   zSizeVarAllocator.guard_lt  sW    xxe$nn;;DA"$$$~~224DDDr@   c                 F   g t        | j                  |      }t        |      D cg c]  \  }}| j                  |      ||f }}}|j	                          dgt        |      z  }d}t        |      D ]$  \  }\  }}}|||<   || j                  ||       |}& |S c c}}w )zz
        Return the order of a sequence as a permutation of range(len(seq)) and guard on that order not changing.
        N)r   remove_precomputed_replacements	enumerate	size_hintsortrK   r   )	r>   seqorig_idxr   orderlast_varr   _
orig_indexs	            r,   guarded_orderzSizeVarAllocator.guarded_order  s     @D88#>?IRSVX#s#Xs3XX
s3x/8~ 	+I+:s )E*#x-H		
  Ys   Bsize_obliviousc                     t        |t        t        j                  j                  j
                  f      sJ t        |             | j                  j                  t        j                  |      |      S )N)r   )
r   r
   rB   logicboolalgBooleantyper   r-   sympify)r>   r   r   s      r,   r-   zSizeVarAllocator.evaluate_expr  s\    
 $u{{':':'B'B CDPd4jPD~~++MM$ , 
 	
r@   c                 ,   t        |t              rt        || j                        }t        |t              rt        || j                        }	 | j	                  |      }| j	                  |      }||k  r| j                  ||       |S | j                  ||       |S # t
        $ rm ||k(  s| j                  ||      r|cY S | j                  ||      r|cY S t        j                  ||      }||k(  r|cY S ||k(  r|cY S t        d| d| d      dw xY w)z>return the smaller of left and right, and guard on that choicezevaluate_min(z, z) with unbacked symintsN)
r   r
   r   r7   r   	TypeErrorr   rB   rf   r   )r>   r   r   lvrvrf   s         r,   evaluate_minzSizeVarAllocator.evaluate_min  s   dD!dD$E$EFDeT"ud&G&GHE	%B&B 8NN4'KNN5$'L%  	u} 9 9$ F((5))D%(Cs{|vRw.EF	s$   "B "DDD6D?Dc                 6    | j                  ||      }||u r|S |S )z=return the larger of left and right, and guard on that choice)r   )r>   r   r   min_vals       r,   evaluate_maxzSizeVarAllocator.evaluate_max  s'     ##D%04u1T1r@   c                     t        |t              r|S | j                  |      }| j                  |t	        j
                  |             t        |      S r1   )r   r   r   r   rB   r   r   s      r,   evaluate_static_shapez&SizeVarAllocator.evaluate_static_shape  sB    dC Kt$$e 455zr@   c                 J    |D cg c]  }| j                  |       c}S c c}w r1   )r   )r>   r   r   s      r,   evaluate_static_shapesz'SizeVarAllocator.evaluate_static_shapes  s!    7;<!**1-<<<s    c                 j    t        d |j                  D              rt        || j                        S |S )Nc              3   P   K   | ]  }t        |t        j                           y wr1   )r   r   PRECOMPUTED_SIZE)r   r   s     r,   r   zCSizeVarAllocator.remove_precomputed_replacements.<locals>.<genexpr>  s     SA~a!6!67Ss   $&)anyrc   r   r7   rE   s     r,   r   z0SizeVarAllocator.remove_precomputed_replacements  s.    SARARSSdD$E$EFFr@   c                 4   t        |t              r|S | j                  |      }t        |t              st        |t              sJ |S |j                  }|s	 t        |      S | j                  |      }t        || j                        S # t
        $ r |cY S w xY wr1   )	r   r   rF   r
   rc   r   r   r   r4   )r>   r   rc   s      r,   symbolic_hintzSizeVarAllocator.symbolic_hint	  s    dC K}}T"$%dC(((K((4y  33D9$00  s   
B	 	BBfallbackr   c                   | j                  |      }t        |t        t        j                  f      s ||j
                  D ci c])  }|| j                  j                  j                  |d       + }}t        d |j                         D              rt        ||      }t        |j                  t        t        j                  f      rt        |t        |j                              }t        |j                  t        t        j                  f      rt        |t        |j                              }|S 	 t        |      S c c}w # t         $ r t"        j%                  d|        w xY w)Nc              3   $   K   | ]  }|d u 
 y wr1    )r   vrs     r,   r   z-SizeVarAllocator.size_hint.<locals>.<genexpr>#  s     Fb2T>Fs   zfailed on: %s)r   r   r   rB   r   rc   r   r!   rN   r   valuesr   lowerry   upperminr(   r)   r*   )r>   r   r   outr   unbacked_sym_vrshint_vrs          r,   r   zSizeVarAllocator.size_hint  s&      &#U]]349M FIEUEU @A4>>..221d;;    F,<,C,C,EFF%c+;<gmmc5==-AB"8S-?@Hgmmc5==-AB"8S-?@HO	s8O   	IIos+	s   .D?4
E !E%exprs.c                0     t         fd|D              S )Nc              3   D   K   | ]  }j                  |         yw)r   N)r   )r   r   r   r>   s     r,   r   z.SizeVarAllocator.size_hints.<locals>.<genexpr>7  s     IaT^^A^9Is    rz   )r>   r  r   s   ` `r,   
size_hintszSizeVarAllocator.size_hints1  s     I5IIIr@   c                       t        j                  |      |      t         j                        t        j                  |       fd       }|S )zp
        Wrapper around functools.lru_cache that clears when replacements
        has been invalidated.
        c                      t        j                        k7  r%t        j                        j                           | i |S r1   )rK   r5   cache_clear)argskwargsfn_cache	prior_lenr>   s     r,   wrapperz,SizeVarAllocator._lru_cache.<locals>.wrapperA  sD     C 1 122 1 12	$$&T,V,,r@   )	functools	lru_cacherK   r5   wraps)r>   fnmaxsizer  r  r  s   `   @@r,   
_lru_cachezSizeVarAllocator._lru_cache9  sN    
 09&&w/3))*				- 
	- r@   c           
          | j                  | j                        	 ddt        dt        t        j
                     dt        t        t        j
                        dt        t           ffd}|S )Nr   varssupport_varsr"   c                 D    |s|} | t        |      t        |            S r1   r  )r   r  r   rR   s      r,   r9   z<SizeVarAllocator.make_stride_vars_cache.<locals>.stride_varsN  s%    
  #dU<-@AAr@   r1   )r  _stride_varsr
   r   rB   r{   r   r   )r>   r9   rR   s     @r,   r8   z'SizeVarAllocator.make_stride_vars_cacheK  sn     1 12
 >B	B	B5<<(	B #8ELL#9:	B $Z		B r@   r   r  r   c           
         g }| j                  |      }|t        ||D ci c]#  }|dk7  s	|t        j                  j                  % c}      z
  }t        t        |            D ]  }t        |t        t        |            D ci c]3  }||   ||   k7  r&||   dk7  r||   t        j                  j                  5 c}      }||   }|dk(  r*|j                  t        j                  j                         |j                  t        ||t        j                  j                  i      t        ||t        j                  j                  i      z
          |S c c}w c c}w )a  Convert an indexing expression back into strides

        NOTE: This is only valid if the index is a standard strided offset
        calculation. e.g. 10 * ModularIndexing(i0 + 1, 1, 2) would give a
        stride of -10 because the index wraps around after the first element

        r   )	rF   r   rB   r   r   r   rK   r|   One)	r>   r   r  r   r   ri   r   r   	index_dims	            r,   r"  zSizeVarAllocator._stride_varsY  s8    e$
\DQ!VAuww||OD
 
 s4y! 	A" #3|#45Aw,q/1l1o6J !OUWW\\1I QAAvuww||, y1eggkk*:; Q,=>?	& + Es   
EE8Ec                .   t        |t              rt        |      S t        |t              sJ t        |             |j                  }|D ci c].  }|t
        j                  j                  j                  ||      0 }}|j                  |      S c c}w )Nr   )
r   r   r
   r   rc   r   graphsizevarsr   subs)r>   r   r   rc   symbol	size_dicts         r,   atomically_apply_size_hintz+SizeVarAllocator.atomically_apply_size_hint  s     dC t9 $%1tDz1%(( '
 AGG$$..v.II
	 
 yy##	
s   3Bc                     | j                  |      }t        ||D ci c]#  }|dk7  s	|t        j                  j                  % c}      S c c}w )z-Extract offset part of an indexing expressionr   )rF   r   rB   r   r   )r>   r   r  ri   s       r,   
offset_varzSizeVarAllocator.offset_var  s=    e$%4!Ja16!UWW\\/!JKK!Js
   
A
A
c                 0   |j                   D ]+  }t        |t        j                        st	        ||di      }- g }| j                  |||      D ]#  }	 |j                  | j                  |             % |S # t        $ r |j                  d       Y Dw xY wrp   )	rc   r   r   INDIRECTr   r9   r|   r   r   )r>   r   r  r   ri   rQ   r   s          r,   stride_hintszSizeVarAllocator.stride_hints  s     ## 	2Aa/"51a&1	2 !!%|< 	!A!dnnQ/0	!
   !a !s    A88BBc           	          t        t        t        | j                  ||                  t	        t        t                          }|j                  fd       |S )Nc                     |    dk(  |    fS rp   r  )r   r   s    r,   <lambda>z/SizeVarAllocator.stride_order.<locals>.<lambda>  s    '!*/71:!> r@   )rP   )rz   r   absr1  r   r   rK   r   )r>   r   r  r   r   s       @r,   stride_orderzSizeVarAllocator.stride_order  sH    C!2!25$!?@AU3w<()

>
?r@   c                    t        |t        t        j                  t        j                  f      s|j
                  s|j                  r|S | j                  |      }|| j                  vrKt        t        j                  t        | j                              }|| j                  |<   || j                  |<   | j                  |   S r1   )r   r   rB   r{   Number	is_number	is_symbolr   r6   r   r   r   rK   r7   )r>   r   syms      r,   lookup_precomputed_sizez(SizeVarAllocator.lookup_precomputed_size  s    tc5<<>?~~~~K33D9t4440%%s4+H+H'IC 36D))$/59D--c2,,T22r@   c                     t        | j                  j                               t        | j                  j                               z
  S r1   )r   r4   keysr5   )r>   s    r,   rc   zSizeVarAllocator.free_symbols  s3    $//..01Jt?P?P?U?U?W4XXXr@   c                       fd}t        |t              rQ|j                  \  }}} ||||d      s|S |j                  \  }}} ||||d      s|S ||z  dk7  r|S t        |d|      S |S )a-  
        A pair of special ModularIndexing can be combined.

        E.g. ModularIndexing(ModularIndexing(x, 1, a), 1, b)
        We can simplify this to ModuleIndexing(x, 1, b), if
        1. x is non negative integer
        2. a and b are positive integers
        3. a is a multiple of b.
        c                 
   t        |t        j                        rt        |t        j                        sy|dk7  ry|dk  ry|rt        | t              syyt        | t        j                        rj                  | d      syy)NFr   r   T)r   rB   r   r   r{   r   )r   divmodis_firstr>   s       r,   _check_argszDSizeVarAllocator.combine_modular_indexing_pairs.<locals>._check_args  st    c5==1C9Waxax!!_5  	 "!U\\2$:S:Sq; !r@   TFr   r   )r   r   r  )	r>   r   rD  r   rA  rB  x2div2mod2s	   `        r,   combine_modular_indexing_pairsz/SizeVarAllocator.combine_modular_indexing_pairs  s|    	* e_-**KAsCq#sD1VVNBdr4u5czQ"2q#..r@   c                    t        |t        j                        sy|j                  }t	        |      dk  ryd}g }g }t        |      D ]L  \  }}t        |t        j                        rt	        |j                        dk7  r y|j                  \  }}	|j                  |	       |j                  |       t        |t        j                        rt        |	t        j                        s y| j                  |	d      r yt        |t              r|j                  \  }	}t        |t        j                        rt        |	t        j                        s y| j                  |	d      s y|dk\  r y|}|j                  |	       |j                  |       M y |dk  ry||   }
t        j                  j                  }t        ||t        j                                D ]  \  }	}}||k(  r||	z  }|||
z  |	z  z  } ||
fS )a  
        Expand the FloorDiv to the entire expression so that the expression may
        be simplfied.

        E.g., for a 2D contiguous tensor with shape [a, 2 * b], and index variables
        x1, x2, index expression 'x1 * 2b + x2' can be easily combined.
        But index expression 'x1 * b + x2 // 2' can not.
        By expanding the FloorDiv to the entire expression, we get
        '(x1 * 2b + x2) // 2'. This transformation allows us to merge loops
        for the numerator!

        Return false if this optimization can be applied;
        Return the new expression and the denominator otherwise.
        The original expression will be equivalent to 'new_expression // denominator'
        F   r   r   )r   rB   Addr  rK   r   Mulr|   r   r{   r   r   r   r   r   r   count)r>   r   termsfloor_div_indexvarlist
factorlistidxtermfactorr   r   r   s               r,   expand_floor_divz!SizeVarAllocator.expand_floor_div  s   $ %+

u:>
"5) %	IC$		* tyy>Q& "iis#!!&)!&%--8
A ! 00a8 D(+"iiV!&%--8
A !00a8 "a' "%s#!!&)K%	N Q !1GGLL	 #GZ9J K 	:Co%S 	f{2c99			: +%%r@   r1   )r"   N)F)=__name__
__module____qualname__r3   r
   rF   r   r   r:   r<   rO   r   rB   r{   rV   r	   Basicr&   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r-   r   r   r   r   r   r   r   r   r   r   rz   r  r  r8   r"  r,  r.  r1  r6  r<  r   rc   rH  rU  __classcell__r?   s   @r,   r/   r/   9   s   @.>T >$4:KT:Q1R $0,^$ ^I ^$ ^@MCu||,MCR3E%++t2C,D 3 3C$)$C-249-=C	C
d 
DJ 
SW 
2 2eD#I6F 24 22 2eD#I6F 24 22 2U495E 2$ 22 2U495E 2$ 2	2	2,1$),<	2		2L L L d t  .d .4 .D .ET E$ E4 E6  %
D%++--5556
 
 
	
 d t :2 2d 2t 2%c	*: s =8E$)4D+E =$s) =D T 
1%c	"2 1uT3Y7G 1$ DH$)$3;C=	6 #'	J~J 3-	J
 
sCxJ$&& u||$& u||,	&
 
d&R DH$$)$$3;C=$	tSy	$$L LD,> L4 L :>	 u||$ x56	
 
c"$ d5<<.@ T#Y 3D 3T 3 Yj6 Y0EJJ 05:: 0dO&ZZO&	tU5::uzz122	3O&r@   r/   c                 z    t        | t        j                        r| j                  t              s| S t        |       S r1   )r   rB   rK  r~   r   _join_dimensions_cached)r   s    r,   rv   rv   F  s+    dEII&dhh.G"4((r@      c                    t        | t        j                        sJ t        j                  ddgd      }t        j                  dd      }t        j                  dd      }t        j                  dd      }t        j                  d	d      }| j                  D ]  }|j                  |t        |||      z        }|s%| j                  D ]x  }|j                  ||   ||   z  t        ||   ||   ||   z  |      z        }	|	s:||k7  s@t        | |z
  |z
  ||   t        ||   ||   ||   |	|   z        z  z         } | c c S   | j                  D ]  }|j                  |t        |||      z        }|s%| j                  D ]g  }|j                  ||   ||   z  t        ||   ||   ||   z        z        }	|	9t        | |z
  |z
  ||   t        ||   ||         z  z         } | c c S   | S )
z
    ModularIndexing(i0, 1, 32) + 32 * ModularIndexing(i0, 32, 4)
    becomes
    ModularIndexing(i0, 1, 128)
    ModularIndexing(i0, 1, 32) + 32 * FloorDiv(i0, 32)
    becomes i0


    This type of pattern can come from view operations
    scaler   T)rb   ru   rg   rt   rh   rq   modulus2)	r   rB   rK  rd   r  re   r   rv   r   )
r   r`  rg   rh   mod1rG  term1r   term2r   s
             r,   r]  r]  L  s1    dEII&&&JJwT:E::fd+DjjD1G::i.D::j$/D  [[w!EEF  [[uIh%bh7bh0FMN
 %5.*   U))"T(BwKDBtHATUVVD  K  $   [[w!EEF  [[uI4(8BtHbkBtH>T+UU >*   U)hr$xG&EEFD  K   Kr@   c                   j     e Zd ZdZdeddf fdZdedej                  fdZ	dd	Z
d
 Zd Zd Z xZS )SimplifyIndexingzt
    A wrapper around .virtualize.ops that uses var range information to
    simplify ModularIndexing/FloorDiv.
    rI   r"   Nc                 H    t         |   |       d| _        fd| _        y )Nrf  c                 X    t         j                  j                  j                  |       S r1   )r   r'  r(  r;   )r   rI   s    r,   r4  z+SimplifyIndexing.__init__.<locals>.<lambda>  s    !''**??zR r@   )r2   r3   name	_simplify)r>   innerrI   r?   s     `r,   r3   zSimplifyIndexing.__init__  s!    &	R 	r@   ri  r   c                 X    | j                   j                  || j                  |            S r1   )_innerloadrj  )r>   ri  r   s      r,   rn  zSimplifyIndexing.load  s"    {{dnnU&;<<r@   c                 ^    | j                   j                  || j                  |      ||      S )N)mode)rm  storerj  )r>   ri  r   valuerp  s        r,   rq  zSimplifyIndexing.store  s)    {{  t~~e'<e$ OOr@   c                 Z    | j                   j                  || j                  |      |      S r1   )rm  store_reductionrj  )r>   ri  r   rr  s       r,   rt  z SimplifyIndexing.store_reduction  s$    {{**41FNNr@   c                 X    | j                   j                  | j                  |      |      S r1   )rm  
index_exprrj  )r>   r   dtypes      r,   rv  zSimplifyIndexing.index_expr  s"    {{%%dnnU&;UCCr@   c                 \    | j                   j                  | j                  |      |||      S r1   )rm  check_boundsrj  )r>   r   r   r  r  s        r,   ry  zSimplifyIndexing.check_bounds  s&    {{''u(=tUERRr@   r1   )rV  rW  rX  __doc__r   r3   strrB   r
   rn  rq  rt  rv  ry  rZ  r[  s   @r,   rf  rf    sJ    

) 
 
= =UZZ =PODSr@   rf  )NN)5r  r   loggingcollections.abcr   r   typingr   r   r   r   r	   rB   r
   %torch.fx.experimental.symbolic_shapesr   r   torch.utils._ordered_setr   torch.utils._sympy.functionsr   r   torch.utils._sympy.symbolr   r   torch.utils._sympy.value_rangesr   r   r   runtime.runtime_utilsr   utilsr   r   r   r   r   virtualizedr   	getLoggerrV  r)   rY  r&   rz   r{   r-   r/   rv   r  r]  WrapperHandlerrf  r  r@   r,   <module>r     s-      . 7 7   Q / B : Q Q 0   g! +/KO	
T!
" U5::&' 5u||[5E'E!FGH	
 
4J& J&Z)$ )4 ) S3$ 34 3 3lSq'' Sr@   