
    VhS?                        d dl Z d dlZd dlmZmZ d dlZd dlmZ d dl	Zd dl
mc mZ d dlmZ d dlmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZmZm Z  d dl!m"Z" ejF                  jH                  jJ                  Z%d	 Z&d
e'e   de(fdZ)d dddeejT                  ejT                  ge+ejT                  ejT                  f   f   dejT                  dejT                  de(de,de+ejT                  ejT                  f   fdZ- G d de      Z. e.       Z/d%dZ0dejb                  de(dejb                  fdZ2dede'ejb                     de'ejb                     de+ejb                     fdZ3e/ji                  ejj                        d        Z6 e/ji                  ejn                         ee/d              e/ji                  e      d!        Z8e/ji                  e      d"        Z9e/jt                  d#        Z;d&d$Z<y)'    N)AnyCallable)DispatchKey)	!_has_potential_branch_input_alias$_has_potential_branch_input_mutation_maybe_compile_and_run_fnautograd_not_implementedfirst_slice_copyreenter_make_fxunique_graph_id!UnsupportedAliasMutationExceptionvalidate_subgraph_args_types)HigherOrderOperator)FakeTensorMode)disable_proxy_modes_tracingProxyTorchDispatchModetrack_tensor_tree)_get_current_dispatch_modec                     t        |      ||z   k(  sJ d||z    dt        |              t        j                  |d | |      }t        j                  ||d  |      } | ||      S )Nz7Combin_fn received wrong number of arguments, expected z
, but got )lenpytreetree_unflatten)
combine_fn	spec_initspec_xsnum_init_leavesnum_inp_leavesargscarryxss           L/home/dcms/DCMS/lib/python3.12/site-packages/torch/_higher_order_ops/scan.pywrap_combine_fn_flatr"   #   s     t9.( y	@SaAa@bblmpqumvlwxy  !!$'7"8)DE			tO$45w	?BeR      flat_out	num_carryc                     | d | | |d  fS N )r$   r%   s     r!   _extract_carry_and_outr)   .   s    JY)*!555r#   F)dimreverser   initr    r*   r+   returnc          
      f   t        j                  |      \  }}t        j                  |      \  }}t        |      dk(  r|g fS d }	|d   j                  }
t	        j
                  |
|      } |	| ||||       g }|D ](  }|j                  t        j                  ||d             * |r$|D cg c]  }t        j                  |dg       }}t        j                  t        | ||t        |      t        |            } d }t        || ||      \  }}|rt        j                  d |      }||fS c c}w )a  
    Performs an inclusive scan with a combine function.

    .. warning::
        `torch.scan` is a prototype feature in PyTorch. It currently
        does not support autograd and you may run into miscompiles.
        Read more about feature classification at:
        https://pytorch.org/blog/pytorch-feature-classification-changes/#prototype

    Args:
        combine_fn (Callable): A binary callable with type ``(Tensor, Tensor) -> (Tensor, Tensor)``,
            or if xs is a pytree ``(pytree, pytree) -> (pytree, pytree)``.
            The first input to ``combine_fn`` is the previous or initial scan carry
            and the second input element to ``combine_fn`` is a slice of the input along dim.
            The first output element of ``combine_fn`` is the next scan carry
            and the second output  of ``combine_fn`` represents a slice of the output.
            This function must be pure, i.e., no lifted arguments are supported at the moment
            and may not have any side effects.
        init (torch.Tensor or pytree with tensor leaves): The inital scan carry, a tensor, or nested pytree of tensors.
            The ``init`` is expected to have the same pytree structure as the first output element (i.e. carry)
            of ``combine_fn``.
        xs (torch.Tensor or pytree with tensor leaves): The input tensor, or nested pytree of tensors.

    Kwargs:
        dim (int): the dimension to scan over, default 0.
        reverse (bool): A boolean stating if the scan should be reversed with respect to ``dim``, default ``False``.

    Returns:
        final_carry (torch.Tensor or pytree with tensor leaves),
            the final carry of the scan operation with same pytree structure as init.
        out (torch.Tensor or pytree with tensor leaves),
            each tensor leaf is a stacked output along first dim, where each slice is the output of a scan iteration.

    Example::

        def add(x: torch.Tensor, y: torch.Tensor):
            next_carry = y = x + y
            return next_carry, y

        i0 = torch.zeros(1)
        xs = torch.arange(5)
        # returns torch.tensor([10.]), torch.tensor([[0], [1.], [3.], [6.], [10.]])
        last_carry, cumsum = scan(add, init=i0, xs=xs)


    r   c                    t        |       st        d      t        |t              s t        dt	        t        |            z         t        |t              s t        dt	        t        |            z         t        |      dk(  rt        d      |D ]*  }t        |t        j                        rt        d|        |D ]*  }t        |t        j                        rt        d|        y )Nz,Combine_fn must be a callable, but got {cfn}zDim must be an int, but got z Reverse must be a bool, but got r   z%scan() operator requires init leaves.z)All init leaves must be a Tensor but got z'All xs leaves must be a Tensor but got )
callableRuntimeError
isinstanceintstrtypeboolr   torchTensor)cfnlxslinitdrxs         r!   _validate_inputzscan.<locals>._validate_inputt   s    }MNN!S!=DGLMM!T"ACQLPQQ u:?FGG 	TAa."%Nqc#RSS	T
  	RAa."%LQC#PQQ	Rr#   )r   r   r   r   r   c                      t        | ||d      S )Nr(   additional_inputs)scan_op)r   leaves_init	leaves_xss      r!   run_flattened_scanz scan.<locals>.run_flattened_scan   s    z;	RPPr#   c                 &    | j                  dg      S Nr   )flip)elems    r!   <lambda>zscan.<locals>.<lambda>   s    499aS> r#   )r   tree_flattenr   ndimutilscanonicalize_dimappendr7   movedimrI   	functoolspartialr"   r   tree_map)r   r,   r    r*   r+   rD   r   leaves_xs_origr   r?   rM   rE   rJ   rF   r   outs                   r!   scanrW   2   sL   v $006K$11"5NG >aRxR* !!!D

 
 s
+CJS'J I 6tS!456 7@AtUZZqc*A	A ""K(9~JQ +	JE3 oo93?#:9 Bs   ,D.c                   (     e Zd Z fdZ fdZ xZS )ScanOpc                 $    t         |   d       y )NrW   )super__init__)self	__class__s    r!   r\   zScanOp.__init__   s     r#   c                     t        |t        t        f      sJ d       t        |t              rt        |      n|}t        |       t        |   ||||      S )Nz"additional_inputs must be a tuple.)r2   tuplelistr   r[   __call__)r]   r   r,   r    rB   r^   s        r!   rb   zScanOp.__call__   sj    
 t}
 	0/	0 

 +T2 #$" 	
 	%%67w
D"6GHHr#   )__name__
__module____qualname__r\   rb   __classcell__)r^   s   @r!   rY   rY      s    !I Ir#   rY   c                 8      fdfd} |||      }|S )Nc                  4    t        j                   |        S r'   )r   tree_leaves)r   operators    r!   call_operatorz#generic_scan.<locals>.call_operator   s    !!(D/22r#   c                    | }t        |      dk(  r|g fS |d   j                     }d}t        |       }t         g ||D cg c]  }t        |       c} |      \  }}t	        t        |      D 	
cg c]  \  }	}
t        j                  |gt        |
j                               z   |
j                  |
j                        t        j                  |
t        j                        j                  d      g c}
}	 \  fd}t        |      D ]D  }	|	}t         g ||D cg c]  }|j!                  |       c} |      \  }} |||       F g |t              S c c}w c c}
}	w c c}w )z*Perform scan on `elems` using `elems_init.r   )dtypedevice)rm   c                 |    t        |       D ]+  \  }}}|j                  d||z  |j                  d             - y rH   )zipscatter_	unsqueeze)rV   indor>   idxidxsoutss        r!   store_out_in_outsz6generic_scan.<locals>._scan.<locals>.store_out_in_outs   s?     sD1 9	1c
 

1cCiQ89r#   )r   shaper)   r
   rp   	enumerater7   zerosra   sizerm   rn   	ones_likeint64rr   rangeselect)r,   r    r   	num_elemsrs   r   rJ   dummy_carry	dummy_outierx   rV   rv   rw   rB   rk   r*   s                @@r!   _scanzgeneric_scan.<locals>._scan   s   r7a<"9qEKK$	 d)!7 :<=$"4-= #
 "
Y  &i0
 Aq KK"d1668n4gg xx
 OOAU[[9CCAF


d	9 y! 	(AC/ 8:;dkk#s+; '
  JE3 c3'	( %$d$$a >
6 <s   E/;BE4
,E:r(   )rj   r,   r    r*   rB   r   scansrk   s   `  ``  @r!   generic_scanr      s     3>%@ $OELr#   yscan_lengthc                      | j                  d      j                  |gdg| j                  z  z    j                  t        j
                        S )Nr      )memory_format)rr   repeatrM   cloner7   contiguous_format)r   r   s     r!   stack_yr     sE    	A	;-1#,.	1	U44	5r#   rB   c                    ddl m} t               5  |D cg c]
  } ||       }}|D 	cg c]  }	t        |	       }
}	|D 	cg c]&  }	t	        |	t
        j                        r ||	      n|	( }}	 t        |      g ||
| }d d d        d }j                  j                  D ]?  }|j                  dk(  s|J t        |j                        dk(  sJ |j                  d   }A |J t        |t        |            \  }}t        ||      D ]  \  }}|}|j                  d   }|j                  d   }|j                   |j                   k7  s3|j"                  |j"                  k7  s|j$                  |j$                  k7  srt'        d| dd	| z          t)        | d
      \  }}| j*                  j,                  j/                  ||       ||||f}t1        j2                  | j*                  j4                  |      }| j*                  j7                  d||i d      }t               5  |d   j$                  d   t        |D cg c]  }|j                  d    c}t        |            \  }}g |fd|D        }d d d        t9        |d | j*                        S c c}w c c}	w c c}	w # 1 sw Y   *xY wc c}w # 1 sw Y   CxY w)Nr   )clone_inputoutputr   tensor_metavalz+Expected metadata of the combine_fn result z to be the same as zthe metadata of init with scan_combine_graph)prefixcall_functionrW   )namec              3   6   K   | ]  }t        |        y wr'   r   .0tr   s     r!   	<genexpr>ztrace_scan.<locals>.<genexpr>]  s     <!ga%<   )constanttracer)torch._dynamo.utilsr   r   r
   r2   r7   r8   r   graphnodesopr   r   r)   rp   metarn   rm   ry   r1   r   r   rootregister_moduler   rT   unwrap_proxycreate_proxyr   ) 
proxy_modefunc_overloadr   r,   r    rB   r   x_initsample_initsr>   sample_inputssample_additional_inputscombine_graphoutputsnoder   r   inicaini_meta
carry_meta	carry_val_combine_graph_namer   
proxy_args	out_proxyrt   
fake_carryfake_outputsrV   r   s                                   @r!   
trace_scanr     s    0	$	& 	
:>?F+??689)!,99 '$
 )ELL9KNq@$
  $
 4
3 

)
,D
	
 G##)) #77h?"?tyy>Q&&&iilG	# *7CI>ME6tU# RWW]+
GGEN	/8>>18>>1=j\I\].xj9:  ,J?STA**+=}M4%67D!2!2!?!?FJ!!..
BV / I 
%	& 
ekk!n#9$+,qQVVE],c$i$
 
L

<|<

 S)d:CTCTUUo @9$
	
 	
b -
 
sO   J.JJ.J$J.+J)/J.(K J;
$K J..J8;K  K	c                 F    t               }|J d       t        | |||      S )Nz-Mode should never be enabled for CPU/CUDA keyrA   )r   r   )r   r,   r    rB   modes        r!   scan_op_denser   c  s,    %'D<HHH<
D"@QRRr#   T)deferred_errorc                 *    t        | t        ||||      S r'   )r   rC   )r   r   r,   r    rB   s        r!   scan_proxy_moder   o  s    dGZr;LMMr#   c                    	 | 5  |d   j                   d   	t         |g ||D cg c]  }t        |       c}| t        |            \  }}g |	fd|D        }|cd d d        S c c}w # 1 sw Y   y xY w)Nr   c              3   6   K   | ]  }t        |        y wr'   r   r   s     r!   r   z(scan_fake_tensor_mode.<locals>.<genexpr>  s     7!ga%7r   )ry   r)   r
   r   )
r   r   r,   r    rB   inpr   r   rV   r   s
            @r!   scan_fake_tensor_moder   t  s    	 ekk!n/ 356C"3'6 #
 I
w

7w7
  
 7 s   !A0A+*A0+A00A9c                 8   | j                  |      }| j                  |      }| j                  |      }| j                         5  | j                  |      }t        | d      xr | j                  j
                  }	|D 
cg c]  }
t        |
       }}
t        t        j                  |||            }t        |||	      rt        d      t        |||	      rt        d      t        ||||      }d d d        | j                        S c c}
w # 1 sw Y   xY w)Nr   )pre_dispatchz(Combine_fn might be modifying the input!z'Combine_fn might be aliasing the input!)unwrap_tensorsredispatch_to_nextfunctionalizehasattrr   r   r
   ra   	itertoolschainr   r   r   rC   wrap_tensors)ctxr   r,   r    rB   unwrapped_xsunwrapped_initunwrapped_additional_inputsfunctional_combine_fnr   r   sample_unwrapped_xs_slicedr   rets                 r!   scan_functionalizer     s1   %%b)L''-N"%"4"45F"G				! 
 # 1 1* =sF+E0E0EGS%T&6s&;%T"%TOO*+
 0L
 4:  -L
 49  !'	
/
: C  5 &U
 
s   :D>DA"DDDc                    t        j                  |      \  }}t        j                  |      \  }}|t        |      dk(  r|g fS g }	|}
|rt        nd } | t        j                  |
|      t        j                  |D cg c]  }t        ||       c}|            \  }}t        j                  |      \  }}t        |      } |t        |d   j                  |                  D ]  }|D cg c]  }|j                  ||       }} | t        j                  |
|      t        j                  ||            \  }
}t        j                  |
      \  }
}t        j                  |      \  }}|	j                  |        t        |      D cg c].  }t        j                   ||	      D cg c]  }||   	 c}      0 }}}t        j                  |
|      t        j                  ||      fS c c}w c c}w c c}w c c}}w )Nr   c                     | S r'   r(   )r>   s    r!   rK   z_fake_scan.<locals>.<lambda>  s    A r#   )r   rL   r   reversedr   r
   r   r|   r   rP   r7   stack)r   r,   r    r*   r+   carry_leaves
carry_spec
inp_leavesinp_specresult_flatr   r   rJ   r   r   dummy_out_leavesdummy_out_spec
num_leavesrs   r   r   	leave_indr   resultss                           r!   
_fake_scanr     s   %2248L*!..r2J	zS_)RxKE+B'eZ05?@TdC(@	
K (.':':9'E$n%&J%
1**3/01 	0:;dkk#s#;;!!%4!!"h/
q &&u-q""1%11	 z* 	2k?;aQy\;<G 
 	eZ0g~6 - A < <s$   >G/-G4G>+G97	G>9G>)r   r(   )Nr   F)=rR   r   typingr   r   r7   torch._prims_common_prims_commonrN   #torch._subclasses.functional_tensortorch.utils._pytree_pytreer   torch._Cr   torch._higher_order_ops.utilsr   r   r   r	   r
   r   r   r   r   
torch._opsr   torch._subclasses.fake_tensorr   "torch.fx.experimental.proxy_tensorr   r   r   torch.utils._python_dispatchr   _opsopsatenr"   ra   r3   r)   PyTreer`   r6   rW   rY   rC   r   r8   r   r   py_implCompositeExplicitAutogradr   Autogradr   r   py_functionalize_implr   r   r(   r#   r!   <module>r      sH        # * $ $  
 
 
 + 8 
 D zz~~!6T#Y 63 6 ~	&fmmV]].J(KK~ --	~
 	~ 
~ ~ 6==&--'(~BI  I* (EVu|| # %,, BV BV u||
	BV
 	U\\BV U\\*BVJ 	667S 8S &$$ %WT:
 	'(N )N 	  !$ 	!! !!J%r#   