
    Vhq                     <    d dl Z d dlZd dlmZ d dlmZ dddddZy)    N)ExpandedWeight)_pytreesumT)
batch_sizeloss_reductionbatch_firstc                2    fdfddvrt        d       t         t        j                  j                        s!t        dt               j                         1t        t              s!t        dt              j                         dk  rt        d        j                         D ]*  }t        |d	      s|j                  t        d
| d       t        j                   j                         fd       }|S )a;	  
    Return a forward function for a module, populating grad_sample with per sample gradients on backward invocation.

    Args:
        module: The ``nn.Module`` to get per sample gradients with respect to. All trainable
          parameters will compute per sample gradients, located in a ``grad_sample``
          field when ``backward`` is invoked
        batch_size: The batch size of the input. If None is passed, all tensor arguments in args and kwargs must have
          the same batch size, which is the size of the first dimension. Otherwise, it must be passed manually.
          Default: None
        loss_reduction: Indicates if the loss reduction (for aggregating the gradients) is a sum or a mean operation. If
          "mean", per sample gradients will be scaled by the batch size to offset the crossbatch interaction from
          running mean across a batch. Must be "mean" or "sum". Default: "sum"
        batch_first: Indicates if the batch dimension is the first dimension. If True, the batch dimension is the first
          dimension. If False, it's the second dimension. Default: True.

    Examples::
        >>> # xdoctest: +SKIP
        >>> model = nn.Linear(4, 3)
        >>> batched_input = torch.randn(5, 4)  # batch size of 5
        >>> res = call_for_per_sample_grads(model)(batched_input).sum()
        >>> res.backward()
        >>> assert model.weight.shape == (3, 4)
        >>> assert model.weight.grad_sample.shape == (5, 3, 4)
        >>> assert model.weight.grad is None
        >>> assert model.bias.shape == (3,)
        >>> assert model.bias.grad_sample.shape == (5, 3)
        >>> assert model.bias.grad is None

    An example using "mean" loss reduction. The grad_sample fields will be scaled by batch_size from what they would be
    if we ran the same code with loss_reduction="sum". This is because the mean at the end will scale all
    grad_outputs by 1 / batch_size from cross batch interaction.
        >>> model = nn.Linear(4, 3)
        >>> batched_input = torch.randn(5, 4)  # batch size of 5
        >>> res = call_for_per_sample_grads(model, 5, loss_reduction="mean")(batched_input).mean()
        >>> res.backward()

    Note::
        Does not work with any `nn.RNN`, including `nn.GRU` or `nn.LSTM`. Please use custom
        rewrites that wrap an `nn.Linear` module. See Opacus for an example
    c                 :    | j                   rt        | |      S | S N)requires_gradr   )	og_tensorr   r   s     O/home/dcms/DCMS/lib/python3.12/site-packages/torch/nn/utils/_per_sample_grad.pymaybe_build_expanded_weightz>call_for_per_sample_grads.<locals>.maybe_build_expanded_weight<   s!    ""!)ZHH    c                  
   t        j                  | i |}d }|D ]X  }t        |t        j                        sr|j
                  d   n|j
                  d   }|||k7  rt        d| d| d      |}Z |t        d      |S )Nr      zDWhen computing batch size, found at least one input with batch size z and one with batch size zV. Please specify it explicitly using the batch size kwarg in call_for_per_sample_gradszUnable to find a tensor in the passed args and kwargs. They may not be pytree-able and so ExpandedWeights cannot compute the batch size from the inputs. Please specify it explicitly)pytreearg_tree_leaves
isinstancetorchTensorshapeRuntimeError)argskwargsargs_and_kwargsr   argarg_batch_sizer   s         r   compute_batch_sizez5call_for_per_sample_grads.<locals>.compute_batch_sizeB   s     00$A&A
" 	(Cc5<<0-8SYYq\ciilN%**F"Z!l";N;K LYY 
 (J	(   
 r   )r   meanz8Expected loss_reduction argument to be sum or mean, got z%Module passed must be nn.Module, got z2Batch size passed must be None or an integer, got r   z!Batch size must be positive, got grad_samplezCurrent Expanded Weights accumulates the gradients, which will be incorrect for multiple calls without clearing gradients. Please clear out the grad_sample parameter of zC or post an issue to pytorch/pytorch to prioritize correct behaviorc            	          }| | i |}	j                         D ci c]  \  }}| ||       }}}t        j                  j                  	|| |      S c c}}w r   )named_parametersr   funcfunctional_call)
r   r   wrapper_batch_sizenamevalueparamsr   r   r   modules
         r   wrapperz*call_for_per_sample_grads.<locals>.wrapperp   s{    '%!3T!DV!D "(!8!8!:
u -e5GHH
 
 zz))&&$GG	
s   A)r   r   r   nnModuletype__name__int
parametershasattrr!   	functoolswrapsforward)r*   r   r   r   weightr+   r   r   s   ````  @@r   call_for_per_sample_gradsr7      s8   b. _,F~FVW
 	
 fehhoo.3DL4I4I3JK
 	
 *Z"=@jAQAZAZ@[\
 	
 *q.>zlKLL##% 6=)f.@.@.Lccibj kRR  __V^^$	H %	H Nr   )r3   r   6torch.nn.utils._expanded_weights.expanded_weights_implr   torch.utilsr   r   r7    r   r   <module>r;      s"      Q ) qr   