
    VhX                        d dl Z d dlmZmZ d dlmZmZmZmZ d dl	Z	d dl
mZ d dl	mZ d dlmZ defdZdeeeef   eeeef      f   d	eeeef   eeeef      f   deeee   f   fd
Zdej*                  dedeeeef      f   deegef   deeedf   eedf   eeee   f   f   fdZdej*                  deeedf   eedf   eeee   f   f   fdZdej*                  deeedf   eedf   eeee   f   f   fdZ	 d3dej*                  dee   dee   deddf
dZdej*                  deeee   f   dee   dee   fdZ	 d3dej*                  dee   dee   deddf
dZ	 	 d4dej*                  dee   dee   dee   dee   dej*                  fd Zdej*                  fd!Zdej*                  fd"Z G d# d$ej*                        Z  G d% d&ej*                        Z!	 d3dej*                  d'edee!eedf   f   fd(Z"	 d3dej*                  d'edee eedf   eedf   f   fd)Z#d*eeedf   df   deedf   fd+Z$d,eej*                     dee eedf   eedf   f   fd-Z%	 	 d5d.e&ej*                     d/eed   ee'   f   d0e	jP                  jR                  fd1Z*	 	 d5d.e&ej*                     d/eed   ee'   f   d0e	jP                  jR                  fd2Z+y)6    N)IterableSequence)AnyCallableNoReturnUnion)Tensor)NamedMemberAccessorreturnc                      t        d      )Na$  make_functional(module): we don't yet support models that do parameter tying (also sometimes known as weight sharing). Please try to rewrite your model by replacing all instances of the tied parameter with another and/or comment your support in https://github.com/pytorch/functorch/issues/446)RuntimeError     P/home/dcms/DCMS/lib/python3.12/site-packages/torch/_functorch/make_functional.pyraise_parameter_tying_errorr      s    
	: r   named_paramstied_named_paramsc                    t        |       } t        |      }t        | j                               }t        |j                               }|j                  |      sJ i }| j	                         D ]  \  }}|g f||<    |j	                         D ]"  \  }}||v sJ ||   d   j                  |       $ t        |j                               S )a[  
    named_params is a dictionary of tensors: {'A': A, 'B': B}
    tied_named_params is another dictionary of tensors {'A': A, 'B': B, 'B_tied': B}
    with potentially tied (or 'duplicated') tensors

    This function creates a mapping from the names in named_params to the
    names in tied_named_params: {'A': ['A'], 'B': ['B', 'B_tied']}.
       )dictsetkeysissubsetitemsappendvalues)r   r   tensors_dict_keystied_tensors_dict_keystensor_to_mappingkeytensors          r   create_names_mapr"   !   s     %L./L--/0 !2!7!7!9:%%&<====?#))+ .V%("I&!.(..0 1V****&!!$++C01 !((*++r   modnamed_members.subclassc                 N   t         |d            }t         |d            }t        ||      }i }t        |       }|D ]@  \  }}	|	|vr  |t        j                  |	d            ||	<   ||	   }
|j                  ||
       B t        |      dk(  rd\  }}nt        | \  }}|||fS )NF)remove_duplicateTmeta)devicer   r   r   )tupler"   r
   torch
empty_like
set_tensorlenzip)r#   r$   r%   all_named_membersunique_named_members	names_mapmemoaccessornamepreplacementnamesparamss                r   _extract_membersr;   =   s    
 mUCD !EF !57HII D"3'H$ /aD=u//&ABDG1gD+.	/  A%v12v5)##r   c                 L    t        | | j                  t        j                        S )aZ  
    This function removes all the Parameters from the model and
    return them as a tuple as well as their original attribute names.
    The weights must be re-loaded with `load_weights` before the model
    can be used again.
    Note that this function modifies the model in place and after this
    call, mod.parameters() will be empty.
    )r;   named_parametersnn	Parameterr#   s    r   extract_weightsrA   V   s     C!5!5r||DDr   c                 2    t        | | j                  d       S )Nc                     | S Nr   )xs    r   <lambda>z!extract_buffers.<locals>.<lambda>g   s    a r   )r;   named_buffersr@   s    r   extract_buffersrH   d   s     C!2!2K@@r   r9   r:   	as_paramsc                     t        |       }|r"|D cg c]  }t        j                  |       }}|j                  ||       yc c}w )a	  
    Reload a set of weights so that `mod` can be used again to perform a forward pass.
    Note that the `params` are regular Tensors (that can have history) and so are left
    as Tensors. This means that mod.parameters() will still be empty after this call.
    N)r
   r>   r?   set_tensors)r#   r9   r:   rI   r5   r7   s         r   load_weightsrL   j   sA     #3'H+12a",,q/22' 3s   Ar3   elemsc                    g }t        |       }t        |j                         |      D ]T  \  \  }}}t        |      D ]>  \  }}	|dk(  r"|j	                  |j                  |	|             -|j                  |	|       @ V |S )Nr   )r
   r0   r   	enumerater   swap_tensorr.   )
r#   r3   rM   resultr5   _
attr_nameselemi	attr_names
             r   _swap_staterW   {   s     F"3'H!$Y__%6!> 5J%j1 	5LAyAvh229dCD##It4		55 Mr   buffersc                 >    t        |       }|j                  ||       y rD   )r
   rK   )r#   r9   rX   rI   r5   s        r   load_buffersrZ      s     #3'H(r   r   modelweightsweight_namesbuffer_namesc                     t        |      t        |      k(  sJ t        | ||       t        |      dkD  r&t        |      t        |      k(  sJ t        | ||       | S )zload_state(model, weights, weight_names, buffers=(), buffer_names=()) -> model

    load_state takes `weights` and `buffers` and assigns them to the model.
    This is the inverse operation of `make_functional_deprecated_v1`.
    r   )r/   rL   rZ   )r[   r\   r]   rX   r^   s        r   
load_stater`      s[     |G,,,g.
7|a< CL000UL'2Lr   c                      t         j                               }t        |      dkD  rt        d      t	               \  }} fd}||fS )a  make_functional_deprecated_v1(model) -> weights, func, weight_names

    Given an nn.Module, make_functional_deprecated_v1 extracts the state (weights)
    and returns a functional version of the model, `func`. This makes
    it so that it is possible use transforms over the parameters of
    `model`.

    `func` can be invoked as follows:
    ```
    x = torch.randn(4, 3)
    model = nn.Linear(3, 3)
    weights, func, _ = make_functional_deprecated_v1(model)
    func(weights, (x,))
    ```

    And here is an example of applying the grad transform:
    ```
    x = torch.randn(4, 3)
    model = nn.Linear(3, 3)
    weights, _, func = make_functional_deprecated_v1(model)
    grad_weights = grad(func)(weights, (x,))
    ```

    To put the state back into a model, use `load_state`.
    r   zmake_functional_deprecated_v1(model): `model` has buffers. Please use make_functional_with_buffers_deprecated_v1(model) instead.c                 R    t        j                        }t        ||         || S rD   )copydeepcopyrL   )r\   datamutable_modeldescriptorsr[   s      r   funz*make_functional_deprecated_v1.<locals>.fun   s(    e,]K9d##r   )listrX   r/   r   rA   )r[   rX   r\   rR   rh   rg   s   `    @r   make_functional_deprecated_v1rj      s[    4 5==?#G
7|aI
 	
 .e4G[!$
 C$$r   c                 `     t               \  }}t               \  }} fd}|||fS )a`  make_functional_with_buffers_deprecated_v1(model) -> weights, buffers, func, weight_names, buffer_names

    Given an nn.Module, make_functional_with_buffers_deprecated_v1 extracts the state (weights and buffers)
    and returns a functional version of the model, `func`.

    `func` can be invoked as follows:
    ```
    x = torch.randn(4, 3)
    model = nn.Linear(3, 3)
    weights, buffers, func, _, _ = make_functional_with_buffers_deprecated_v1(model)
    func(weights, buffers, (x,))
    ```

    And here is an example of applying the grad transform:
    ```
    x = torch.randn(4, 3)
    model = nn.Linear(3, 3)
    weights, buffers, func, _, _ = make_functional_with_buffers_deprecated_v1(model)
    func(weights, buffers, (x,))
    grad_weights = grad(func)(weights, buffers, (x,))
    ```

    To put the state back into a model, use `load_state`.
    c                 l    t        j                        }t        ||        t        ||        || S rD   )rc   rd   rL   rZ   )r\   rX   re   rf   buf_descriptorsr[   weight_descriptorss       r   rh   z7make_functional_with_buffers_deprecated_v1.<locals>.fun   s5    e,]$6@]OW=d##r   )rA   rH   )r[   r\   rR   rX   rh   rm   rn   s   `    @@r   *make_functional_with_buffers_deprecated_v1ro      sA    2 &5U%;"G"1%"8G_a$ GS"4oEEr   c                        e Zd ZdZdej
                  deedf   deedf   deee	e   f   deee	e   f   dd	f fd
Z
e	 ddej
                  deded eedf   eedf   f   fd       Zdee   dee   defdZ xZS )FunctionalModuleWithBufferszW
    This is the callable object returned by :func:`make_functional_with_buffers`.
    stateless_modelparam_names.r^   param_names_mapbuffer_names_mapr   Nc                     t         |           || _        || _        || _        t        |      | _        | j                  j                  |       y rD   )super__init__rr   rs   r^   r   all_names_mapupdate)selfrr   rs   r^   rt   ru   	__class__s         r   rx   z$FunctionalModuleWithBuffers.__init__   sJ     	.&(!/2!!"23r   r[   disable_autograd_trackingc                     t        j                  |       }t        |      \  }}}t        |      \  }}}|r|D ]  }	|	j	                  d        t        |||||      ||fS NF)rc   rd   rA   rH   requires_grad_rq   )
r[   r}   
model_copyr:   rs   rt   rX   r^   ru   params
             r   _create_fromz(FunctionalModuleWithBuffers._create_from  s}    
 ]]5)
/>z/J,_2A*2M//$ ,$$U+, (KHX 
 	
r   r:   rX   c                 "   t        | j                  | j                  t        |      t        |      z         }	  | j                  |i |t        | j                  | j                  |       S # t        | j                  | j                  |       w xY wrD   )rW   rr   ry   r+   )r{   r:   rX   argskwargs	old_states         r   forwardz#FunctionalModuleWithBuffers.forward  s~        &ME'N*
	
	M'4''88 ,,d.@.@)LK,,d.@.@)Ls   A+ +#BF__name__
__module____qualname____doc__r>   Moduler+   strr   ri   rx   staticmethodboolr	   r   r   r   r   __classcell__r|   s   @r   rq   rq      s    44 38_4 CHo	4
 c49n-4 sDI~.4 
4  <A
yy
59
	,eFCK.@%PSBTT	U
 
$Mv&M19&1AM	Mr   rq   c                        e Zd ZdZdej
                  deedf   deee	e   f   ddf fdZ
e	 dd	ej
                  d
eded eedf   f   fd       Zdee   defdZ xZS )FunctionalModulezJ
    This is the callable object returned by :func:`make_functional`.
    rr   rs   .r3   r   Nc                 L    t         |           || _        || _        || _        y rD   )rw   rx   rr   rs   r3   )r{   rr   rs   r3   r|   s       r   rx   zFunctionalModule.__init__3  s'     	.&"r   r[   r}   c                     t        j                  |       }t        |      \  }}}|r|D ]  }|j                  d        t	        |||      |fS r   )rc   rd   rA   r   r   )r[   r}   r   r:   rs   r3   r   s          r   r   zFunctionalModule._create_from>  sW    
 ]]5)
)8)D&Y$ ,$$U+,
KCVKKr   r:   c                     t        | j                  | j                  |      }	  | j                  |i |t        | j                  | j                  |       S # t        | j                  | j                  |       w xY wrD   )rW   rr   r3   )r{   r:   r   r   r   s        r   r   zFunctionalModule.forwardJ  sd     4 4dnnfM		I'4''88 ,,dnniHK,,dnniHs   A #A9r   r   r   s   @r   r   r   .  s    	#	# 38_	# T#Y'		#
 
	# <A	Lyy	L59	L	!5#55	6	L 	LIhv. IC Ir   r   r}   c                     t        | j                               }t        |      dkD  rt        d      t        j                  | |      S )a  make_functional(model, disable_autograd_tracking=False) -> func, params

    Given a ``torch.nn.Module``, :func:`make_functional` extracts the state
    (params) and returns a functional version of the model, ``func``. This
    makes it so that it is possible use transforms over the parameters of
    ``model``.

    ``func`` can be invoked as follows:

    .. code-block:: python

        import torch
        import torch.nn as nn
        from functorch import make_functional

        x = torch.randn(4, 3)
        model = nn.Linear(3, 3)
        func, params = make_functional(model)
        func(params, x)

    And here is an example of applying the grad transform over the parameters
    of a model.

    .. code-block:: python

        import torch
        import torch.nn as nn
        from functorch import make_functional, grad

        x = torch.randn(4, 3)
        t = torch.randn(4, 3)
        model = nn.Linear(3, 3)
        func, params = make_functional(model)

        def compute_loss(params, x, t):
            y = func(params, x)
            return nn.functional.mse_loss(y, t)

        grad_weights = grad(compute_loss)(params, x, t)

    If the model has any buffers, please use :func:`make_functional_with_buffers` instead.

    Args:
        model (torch.nn.Module): Input model.
        disable_autograd_tracking (bool): Flag to disable gradients tracking for output parameters.
            The returned params are unrelated to the set of params from the original model. If False (default),
            the params will have ``requires_grad=True`` on them (aka they will be trackable with regular
            PyTorch autograd), matching the requires_grad-ness of the params from the original model.
            Otherwise, the returned params will have ``requires_grad=False``. Default, False.
            If you plan on using regular PyTorch autograd (e.g., if you want to call ``.backward()`` or
            ``torch.autograd.grad()``, then set ``disable_autograd_tracking=False``.
            Otherwise, if you're only planning on using functorch's gradient transforms,
            then please set ``disable_autograd_tracking=True`` to avoid unnecessarily tracking
            history with PyTorch autograd.

    r   zdmake_functional(model): `model` has buffers. Please use make_functional_with_buffers(model) instead.r}   )ri   rX   r/   r   r   r   )r[   r}   rX   s      r   make_functionalr   T  sS    v 5==?#G
7|a;
 	
 (()B )  r   c                 0    t         j                  | |      S )a  make_functional_with_buffers(model, disable_autograd_tracking=False) -> func, params, buffers

    Given a ``torch.nn.Module``, make_functional_with_buffers extracts the
    state (params and buffers) and returns a functional version of the model
    ``func`` that can be invoked like a function.

    ``func`` can be invoked as follows:

    .. code-block:: python

        import torch
        import torch.nn as nn
        from functorch import make_functional_with_buffers

        x = torch.randn(4, 3)
        model = nn.Linear(3, 3)
        func, params, buffers = make_functional_with_buffers(model)
        func(params, buffers, x)

    And here is an example of applying the grad transform over the parameters
    of a model:

    .. code-block:: python

        import torch
        import torch.nn as nn
        from functorch import make_functional_with_buffers, grad

        x = torch.randn(4, 3)
        t = torch.randn(4, 3)
        model = nn.Linear(3, 3)
        func, params, buffers = make_functional_with_buffers(model)

        def compute_loss(params, buffers, x, t):
            y = func(params, buffers, x)
            return nn.functional.mse_loss(y, t)

        grad_weights = grad(compute_loss)(params, buffers, x, t)

    Args:
        model (torch.nn.Module): Input model.
        disable_autograd_tracking (bool): Flag to disable gradients tracking for output parameters.
            The returned params are unrelated to the set of params from the original model. If False (default),
            the params will have ``requires_grad=True`` on them (aka they will be trackable with regular
            PyTorch autograd), matching the requires_grad-ness of the params from the original model.
            Otherwise, the returned params will have ``requires_grad=False``. Default, False.
            If you plan on using regular PyTorch autograd (e.g., if you want to call ``.backward()`` or
            ``torch.autograd.grad()``, then set ``disable_autograd_tracking=False``.
            Otherwise, if you're only planning on using functorch's gradient transforms,
            then please set ``disable_autograd_tracking=True`` to avoid unnecessarily tracking
            history with PyTorch autograd.

    r   )rq   r   )r[   r}   s     r   make_functional_with_buffersr     s#    p '33)B 4  r   tuple_of_tuple_of_tensorsc                 L    t        t        |        } t        d | D              }|S )Nc              3   b   K   | ]'  }t        j                  |      j                          ) y wrD   r,   stackdetach.0shardss     r   	<genexpr>z"transpose_stack.<locals>.<genexpr>  s'      )/F""$   -/)r+   r0   )r   resultss     r   transpose_stackr     s3     !&c+D&E F 3L G Nr   modelsc                 z   t        |       dk(  rt        d      t        d | D              st        d | D              st        d      t        | d         t        fd| D              st        d      t	        | D cg c]  }t        |       c} \  }}}t        |      }t        |      }|d   ||fS c c}w )a(  combine_state_for_ensemble(models) -> func, params, buffers

    Prepares a list of torch.nn.Modules for ensembling with :func:`vmap`.

    Given a list of ``M`` ``nn.Modules`` of the same class, stacks all of their
    parameters and buffers together to make ``params`` and ``buffers``.
    Each parameter and buffer in the result will have an additional dimension
    of size ``M``.

    :func:`combine_state_for_ensemble` also returns ``func``, a functional
    version of one of the models in :attr:`models`. One cannot directly run
    ``func(params, buffers, *args, **kwargs)`` directly, you probably want to
    use ``vmap(func, ...)(params, buffers, *args, **kwargs)``

    Here's an example of how to ensemble over a very simple model:

    .. code-block:: python

        num_models = 5
        batch_size = 64
        in_features, out_features = 3, 3
        models = [torch.nn.Linear(in_features, out_features) for i in range(num_models)]
        data = torch.randn(batch_size, 3)

        fmodel, params, buffers = combine_state_for_ensemble(models)
        output = vmap(fmodel, (0, 0, None))(params, buffers, data)

        assert output.shape == (num_models, batch_size, out_features)

    .. warning::
        All of the modules being stacked together must be the same (except for
        the values of their parameters/buffers). For example, they should be in the
        same mode (training vs eval).

        This API is subject to change -- we're investigating better ways to
        create ensembles and would love your feedback how to improve this.
    r   z?combine_state_for_ensemble: Expected at least one model, got 0.c              3   4   K   | ]  }|j                     y wrD   trainingr   ms     r   r   z-combine_state_for_ensemble.<locals>.<genexpr>  s     +q

+s   c              3   6   K   | ]  }|j                      y wrD   r   r   s     r   r   z-combine_state_for_ensemble.<locals>.<genexpr>  s     2Raqzz>2Rs   zTcombine_state_for_ensemble: Expected all models to have the same training/eval mode.c              3   :   K   | ]  }t        |      k(    y wrD   )type)r   r   
model0_typs     r   r   z-combine_state_for_ensemble.<locals>.<genexpr>  s     5tAw*$5s   zHcombine_state_for_ensemble: Expected all models to be of the same class.)r/   r   allr   r0   r   r   )r   r[   funcsr:   rX   r   s        @r   combine_state_for_ensembler     s    P 6{aM
 	
 +F++s2R62R/R0
 	
 fQiJ5f55V
 	
 !;A	B%
&u
-	BE67 V$Fg&G8VW$$	 
Cs    B8model_classensemble_shaper)   c                       fd}|S )Nc                      t        
      dk\  rt        d      t        
      dk(  r"  i j                  	      }t        |      S 
d   }|dk  rt        d| d      t	         	fdt        |      D              }t          i       \  }}}t	        d |D              }t	        t        |       }t	        d |D              }|||fS )	N   ,NYI: ensemble_shape with more than 1 elementr   num_models  should be > 0c              3   L   K   | ]  } i j                          y wrD   tor   rR   r   r)   r   r   s     r   r   z3functional_init.<locals>.wrapped.<locals>.<genexpr>.  +      
89K((++F3
   !$c              3   8   K   | ]  }t        |      d      yw)r   N)rj   r   r[   s     r   r   z3functional_init.<locals>.wrapped.<locals>.<genexpr>2  s     TE5e<Q?T   c              3   b   K   | ]'  }t        j                  |      j                          ) y wrD   r   r   s     r   r   z3functional_init.<locals>.wrapped.<locals>.<genexpr>4  "     KF+224Kr   )r/   
ValueErrorr   rj   r+   ranger0   )r   r   r[   
num_modelsr   rR   fnr9   r\   r)   r   r   s   ``       r   wrappedz functional_init.<locals>.wrapped$  s    ~!#KLL~!#0033F;E077#A&
?{:,nEFF 
=B:=N
 
 5[$5Q&5QR2uTVTTW&K7KKE!!r   r   r   r   r)   r   s   ``` r   functional_initr     s    
"& Nr   c                       fd}|S )Nc                  
    t              dk\  rt        d      t              dk(  r"  i j                        }t        |      S d   }|dk  rt        d| d      t	         fdt        |      D              }t          i       \  }}}}}t        t	        d |D               \  }	}
t	        t        |	       }	t	        d |	D              }	t	        t        |
       }
t	        d	 |
D              }
|	|
|||fS )
Nr   r   r   r   r   c              3   L   K   | ]  } i j                          y wrD   r   r   s     r   r   z@functional_init_with_buffers.<locals>.wrapped.<locals>.<genexpr>I  r   r   c              3   8   K   | ]  }t        |      d d   y w)Nr   )ro   r   s     r   r   z@functional_init_with_buffers.<locals>.wrapped.<locals>.<genexpr>T  s$       ;5A"1Er   c              3   b   K   | ]'  }t        j                  |      j                          ) y wrD   r   r   s     r   r   z@functional_init_with_buffers.<locals>.wrapped.<locals>.<genexpr>Z  r   r   c              3   b   K   | ]'  }t        j                  |      j                          ) y wrD   r   r   s     r   r   z@functional_init_with_buffers.<locals>.wrapped.<locals>.<genexpr>\  r   r   )r/   r   r   rj   r+   r   ro   r0   )r   r   r[   r   r   rR   r   r]   r^   r\   rX   r)   r   r   s   ``         r   r   z-functional_init_with_buffers.<locals>.wrapped?  s*   ~!#KLL~!#0033F;E077#A&
?{:,nEFF 
=B:=N
 
 7{D7SF7ST	
 # 
 W&K7KKW&K7KK\<??r   r   r   s   ``` r   functional_init_with_buffersr   :  s    
@@ Nr   r   r*   )r   cpu),rc   collections.abcr   r   typingr   r   r   r   r,   torch.nnr>   r	   %torch.nn.utils._named_member_accessorr
   r   r   r   r+   ri   r"   r   r;   rA   rH   r   rL   rW   rZ   r`   rj   ro   rq   r   r   r   r   r   r   inttypesDevicer   r   r   r   r   <module>r      s    . 1 1    EX ,S&[)8E#v+4F+GGH,T#v+.sF{9K0LLM, 
#tCy.,8$	$C%V*<!==>$ x'($ 5uS#XS$s)^0DDE	$2E	E
5uS#XS$s)^0DDEEA	A
5uS#XS$s)^0DDEA 	(	(C=( V( 	(
 
("	#CcN3<DV<L	&\$ 	)	)C=) f) 	)
 
) !#"$99f 3- f	
 3- YY('% '%T"Fbii "FJ5M")) 5Mp#Iryy #IN 9>C99C15C
U63;//0CN 9>:99:15:
&fck(:E&#+<NNO:z$U63;%7%<=
63;;%RYY;%
&fck(:E&#+<NNO;%@ 46!&bii%)U3Z/0 KK: 46!&%bii%%)U3Z/0% KK%r   