
    Vh
*                        d dl Z d dlZd dlmZmZmZ d dlmZ d dlZd dlmZ d dl	m
Z
mZmZ g dZeej                  ej                  ej                     f   Zd Ze	 	 	 dded	ed
edee   dej                  f
d       Ze	 ddededej                  dee   ddf
d       Ze	 	 	 ddeded	ed
edee   dej                  fd       Z ede      	 	 	 ddeded	ed
edee   dej                  fd       Ze	 ddededee   ddfd       Zy)    N)castOptionalUnion)
deprecated)Tensor)_device_has_foreach_support"_group_tensors_by_device_and_dtype_has_foreach_support)clip_grad_norm_clip_grad_normclip_grad_value_c                 >      fd}t        j                  |        |S )z
    This wrapper is needed to avoid a circular import when using @torch.no_grad on the exposed functions
    clip_grad_norm_ and clip_grad_value_ themselves.
    c                  h    t        j                         5   | i |cd d d        S # 1 sw Y   y xY wN)torchno_grad)argskwargsfuncs     H/home/dcms/DCMS/lib/python3.12/site-packages/torch/nn/utils/clip_grad.py_no_grad_wrapperz"_no_grad.<locals>._no_grad_wrapper$   s.    ]]_ 	)((	) 	) 	)s   (1)	functoolsupdate_wrapper)r   r   s   ` r   _no_gradr      s!    ) -t4    tensors	norm_typeerror_if_nonfiniteforeachreturnc           
         t        | t        j                        r| g} nt        |       } t	        |      }t        |       dk(  rt        j                  d      S | d   j                  }t        | g      }g }|j                         D ]  \  \  }}\  \  }	}|t        |	|      s|r1t        |      r&|j                  t        j                  |	|             O|rt        d|j                   d      |j                  |	D 
cg c]"  }
t        j                   j#                  |
|      $ c}
        t        j                   j#                  t        j$                  |D cg c]  }|j'                  |       c}      |      }|rAt        j(                  |j+                         |j-                               rt        d| d      |S c c}
w c c}w )a  Compute the norm of an iterable of tensors.

    The norm is computed over the norms of the individual tensors, as if the norms of
    the individual tensors were concatenated into a single vector.

    Args:
        tensors (Iterable[Tensor] or Tensor): an iterable of Tensors or a
            single Tensor that will be normalized
        norm_type (float): type of the used p-norm. Can be ``'inf'`` for
            infinity norm.
        error_if_nonfinite (bool): if True, an error is thrown if the total
            norm of :attr:`tensors` is ``nan``, ``inf``, or ``-inf``.
            Default: ``False``
        foreach (bool): use the faster foreach-based implementation.
            If ``None``, use the foreach implementation for CUDA and CPU native tensors and silently
            fall back to the slow implementation for other device types.
            Default: ``None``

    Returns:
        Total norm of the tensors (viewed as a single vector).
    r   g        :foreach=True was passed, but can't use the foreach API on  tensorszThe total norm of order z for gradients from `parameters` is non-finite, so it cannot be clipped. To disable this error and scale the gradients by the non-finite norm anyway, set `error_if_nonfinite=False`)
isinstancer   r   listfloatlentensordevicer	   itemsr
   r   extend_foreach_normRuntimeErrortypelinalgvector_normstackto
logical_orisnanisinf)r   r   r   r   first_devicegrouped_tensorsnormsr)   _device_tensorsgnorm
total_norms                r   _get_total_normr>   ,   s   8 '5<<()w-i I
7|q||C  1:$$L 	+			  E.=.C.C.E **&~O 4^V L3F;LL,,^YGHLV[[MYab  LLAOPA))!Y7P ))u=tTWW\*=>	J e..z/?/?/A:CSCSCUV&yk 2- -
 	
  Q >s   >'G
G
parametersmax_normr=   c                 h   t        | t        j                        r| g} | D cg c]  }|j                  |j                   }}t	        |      }t        |      dk(  ryt        |g      }||dz   z  }t        j                  |d      }|j                         D ]  \  \  }	}
\  \  }}
|t        ||	      s|r1t        |	      r&t        j                  ||j                  |	             O|rt        d|	j                   d      |j                  |	      }|D ]  }|j                  |         yc c}w )a  Scale the gradients of an iterable of parameters given a pre-calculated total norm and desired max norm.

    The gradients will be scaled by the following calculation

    .. math::
        grad = grad * \frac{max\_norm}{total\_norm + 1e-6}

    Gradients are modified in-place.

    This function is equivalent to :func:`torch.nn.utils.clip_grad_norm_` with a pre-calculated
    total norm.

    Args:
        parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
            single Tensor that will have gradients normalized
        max_norm (float): max norm of the gradients
        total_norm (Tensor): total norm of the gradients to use for clipping
        foreach (bool): use the faster foreach-based implementation.
            If ``None``, use the foreach implementation for CUDA and CPU native tensors and silently
            fall back to the slow implementation for other device types.
            Default: ``None``

    Returns:
        None
    Nr   gư>g      ?)maxr"   r#   )r$   r   r   gradr&   r'   r	   clampr*   r
   r   _foreach_mul_r2   r-   r.   mul_)r?   r@   r=   r   pgradsgrouped_grads	clip_coefclip_coef_clampedr)   r9   device_gradsclip_coef_clamped_devicer;   s                 r   _clip_grads_with_norm_rN   s   s<   @ *ell+ \
'>166+=QVV>E>XH
5zQ 	+		  J-.I I37,9,?,?,A 1((n|aO 4\6 J3F;.?.B.B6.JKLV[[MYab  (9';';F'C$! 1/011 ?s
   D/D/c                     t        | t        j                        r| g} nt        |       } | D cg c]  }|j                  |j                   }}t        ||||      }t        | |||       |S c c}w )aL  Clip the gradient norm of an iterable of parameters.

    The norm is computed over the norms of the individual gradients of all parameters,
    as if the norms of the individual gradients were concatenated into a single vector.
    Gradients are modified in-place.

    This function is equivalent to :func:`torch.nn.utils.get_total_norm` followed by
    :func:`torch.nn.utils.clip_grads_with_norm_` with the ``total_norm`` returned by ``get_total_norm``.

    Args:
        parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
            single Tensor that will have gradients normalized
        max_norm (float): max norm of the gradients
        norm_type (float): type of the used p-norm. Can be ``'inf'`` for
            infinity norm.
        error_if_nonfinite (bool): if True, an error is thrown if the total
            norm of the gradients from :attr:`parameters` is ``nan``,
            ``inf``, or ``-inf``. Default: False (will switch to True in the future)
        foreach (bool): use the faster foreach-based implementation.
            If ``None``, use the foreach implementation for CUDA and CPU native tensors and silently
            fall back to the slow implementation for other device types.
            Default: ``None``

    Returns:
        Total norm of the parameter gradients (viewed as a single vector).
    )r$   r   r   r%   rC   r>   rN   )r?   r@   r   r   r   rG   rH   r=   s           r   r   r      sp    D *ell+ \
 *%
'>166+=QVV>E> 	3EwOJ:xWE ?s   A. A.z_`torch.nn.utils.clip_grad_norm` is now deprecated in favor of `torch.nn.utils.clip_grad_norm_`.)categoryc                      t        | ||||      S )zClip the gradient norm of an iterable of parameters.

    .. warning::
        This method is now deprecated in favor of
        :func:`torch.nn.utils.clip_grad_norm_`.
    )r   )r?   r@   r   r   r   s        r   r   r      s    $ :x<NPWXXr   
clip_valuec                    t        | t        j                        r| g} t        |      }| D cg c]  }|j                  |j                   }}t        |g      }|j                         D ]  \  \  }}\  \  }}|"t        t        t        t           |      |      s|rct        |      rXt        j                  t        t        t           |      |        t        j                  t        t        t           |      |       |rt        d|j                   d      |D ]$  }t        t        |      j                  | |       &  yc c}w )a  Clip the gradients of an iterable of parameters at specified value.

    Gradients are modified in-place.

    Args:
        parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
            single Tensor that will have gradients normalized
        clip_value (float): maximum allowed value of the gradients.
            The gradients are clipped in the range
            :math:`\left[\text{-clip\_value}, \text{clip\_value}\right]`
        foreach (bool): use the faster foreach-based implementation
            If ``None``, use the foreach implementation for CUDA and CPU native tensors and
            silently fall back to the slow implementation for other device types.
            Default: ``None``
    N)r)   r"   r#   )minrB   )r$   r   r   r&   rC   r	   r*   r
   r   r%   r   _foreach_clamp_min__foreach_clamp_max_r-   r.   clamp_)	r?   rR   r   rG   rH   rI   r)   r9   rC   s	            r   r   r      s*   * *ell+ \
z"J'>166+=QVV>E>6w?M%2%8%8%: K!\guqO$T$v,%>vN5f=%%d4<&?*M%%d4<&?LLV[[MYab   KVT"))zkz)JKK ?s
   E	E	)g       @FNr   )r   typingr   r   r   typing_extensionsr   r   r   torch.utils._foreach_utilsr   r	   r
   __all__Iterable_tensor_or_tensorsr   r&   boolr>   rN   r   FutureWarningr   r    r   r   <module>ra      s#     ( ( (    	LL
OOELL!#  
 $"	CCC C d^	C
 \\C 
CL 

 #	<1"<1<1 <1 d^	<1
 
<1 
<1~ 
 $")")) ) 	)
 d^) \\) 
)X 4 $"Y"YY Y 	Y
 d^Y \\Y
Y  
 #(K"(K(K d^(K 
	(K 
(Kr   