
    2Vh(                         d dl mZ d dl mZ d dl mZ d dl mZ d dl mZ d dlmZ d dlm	Z	  ed       G d	 d
e	             Z
y)    )backend)constraints)initializers)ops)regularizers)keras_export)Layerzkeras.layers.LayerNormalizationc                   V     e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 d fd	Zd Zd Zd Z fdZ xZ	S )LayerNormalizationa  Layer normalization layer (Ba et al., 2016).

    Normalize the activations of the previous layer for each given example in a
    batch independently, rather than across a batch like Batch Normalization.
    i.e. applies a transformation that maintains the mean activation within each
    example close to 0 and the activation standard deviation close to 1.

    If `scale` or `center` are enabled, the layer will scale the normalized
    outputs by broadcasting them with a trainable variable `gamma`, and center
    the outputs by broadcasting with a trainable variable `beta`. `gamma` will
    default to a ones tensor and `beta` will default to a zeros tensor, so that
    centering and scaling are no-ops before training has begun.

    So, with scaling and centering enabled the normalization equations
    are as follows:

    Let the intermediate activations for a mini-batch to be the `inputs`.

    For each sample `x_i` in `inputs` with `k` features, we compute the mean and
    variance of the sample:

    ```python
    mean_i = sum(x_i[j] for j in range(k)) / k
    var_i = sum((x_i[j] - mean_i) ** 2 for j in range(k)) / k
    ```

    and then compute a normalized `x_i_normalized`, including a small factor
    `epsilon` for numerical stability.

    ```python
    x_i_normalized = (x_i - mean_i) / sqrt(var_i + epsilon)
    ```

    And finally `x_i_normalized ` is linearly transformed by `gamma` and `beta`,
    which are learned parameters:

    ```python
    output_i = x_i_normalized * gamma + beta
    ```

    `gamma` and `beta` will span the axes of `inputs` specified in `axis`, and
    this part of the inputs' shape must be fully defined.

    For example:

    >>> layer = keras.layers.LayerNormalization(axis=[1, 2, 3])
    >>> layer.build([5, 20, 30, 40])
    >>> print(layer.beta.shape)
    (20, 30, 40)
    >>> print(layer.gamma.shape)
    (20, 30, 40)

    Note that other implementations of layer normalization may choose to define
    `gamma` and `beta` over a separate set of axes from the axes being
    normalized across. For example, Group Normalization
    ([Wu et al. 2018](https://arxiv.org/abs/1803.08494)) with group size of 1
    corresponds to a Layer Normalization that normalizes across height, width,
    and channel and has `gamma` and `beta` span only the channel dimension.
    So, this Layer Normalization implementation will not match a Group
    Normalization layer with group size set to 1.

    Args:
        axis: Integer or List/Tuple. The axis or axes to normalize across.
            Typically, this is the features axis/axes. The left-out axes are
            typically the batch axis/axes. `-1` is the last dimension in the
            input. Defaults to `-1`.
        epsilon: Small float added to variance to avoid dividing by zero.
            Defaults to 1e-3.
        center: If True, add offset of `beta` to normalized tensor. If False,
            `beta` is ignored. Defaults to `True`.
        scale: If True, multiply by `gamma`. If False, `gamma` is not used.
            When the next layer is linear (also e.g. `nn.relu`), this can be
            disabled since the scaling will be done by the next layer.
            Defaults to `True`.
        rms_scaling: If True, `center` and `scale` are ignored, and the
            inputs are scaled by `gamma` and the inverse square root
            of the square of all inputs. This is an approximate and faster
            approach that avoids ever computing the mean of the input. Note that
            this *isn't* equivalent to the computation that the
            `keras.layers.RMSNormalization` layer performs.
        beta_initializer: Initializer for the beta weight. Defaults to zeros.
        gamma_initializer: Initializer for the gamma weight. Defaults to ones.
        beta_regularizer: Optional regularizer for the beta weight.
            None by default.
        gamma_regularizer: Optional regularizer for the gamma weight.
            None by default.
        beta_constraint: Optional constraint for the beta weight.
            None by default.
        gamma_constraint: Optional constraint for the gamma weight.
            None by default.
        **kwargs: Base layer keyword arguments (e.g. `name` and `dtype`).


    Reference:

    - [Lei Ba et al., 2016](https://arxiv.org/abs/1607.06450).
    c                 J   t        |   di | t        |t        t        f      rt        |      | _        n&t        |t              r|| _        nt        d|z        || _        || _	        || _
        || _        t        j                  |      | _        t        j                  |      | _        t!        j                  |      | _        t!        j                  |	      | _        t'        j                  |
      | _        t'        j                  |      | _        d| _        d| _        y )NzQExpected an int or a list/tuple of ints for the argument 'axis', but received: %rTF )super__init__
isinstancelisttupleaxisint	TypeErrorepsiloncenterscalerms_scalingr   getbeta_initializergamma_initializerr   beta_regularizergamma_regularizerr   beta_constraintgamma_constraintsupports_maskingautocast)selfr   r   r   r   r   r   r   r   r   r   r    kwargs	__class__s                b/home/dcms/DCMS/lib/python3.12/site-packages/keras/src/layers/normalization/layer_normalization.pyr   zLayerNormalization.__init__n   s     	"6"dT5M*T
DIc"DI46:; 
 
& , 0 01A B!-!1!12C!D , 0 01A B!-!1!12C!D*? +0@ A $    c           	      @   t        | j                  t              r't        | j                  D cg c]  }||   	 c}      }n"|| j                     f}| j                  g| _        | j                  s| j
                  r<| j                  d|| j                  | j                  | j                  dd      | _
        nd | _
        | j                  rH| j
                  s<| j                  d|| j                  | j                  | j                  dd      | _        y d | _        y c c}w )NgammaTF)nameshapeinitializerregularizer
constraint	trainabler"   beta)r   r   r   r   r   r   
add_weightr   r   r    r)   r   r   r   r   r0   )r#   input_shapedimr+   s       r&   buildzLayerNormalization.build   s    dii&tyyA;s+ABE +-EDI::)) 22 2200 ) DJ DJ;;t// 11 11// ( DI DI9 Bs   Dc                     |j                   }t        |      dgz   j                  D ]
  }||   |<     fd}t        j                  |j
                  d      }t        j                  ||      } j                  r{t        j                  | j                  d      }t        j                  | j                  z         }||z  t        j                   | j                        |j
                        z  }nt        j                  | j                  d      \  }	} | j                         | j                        }}
t        j                  | j                  z         }|
%t        j                  |
|j
                        }
||
z  }|	 |z  }|%t        j                  ||j
                        }||z   }||z  |z   }t        j                  | j                        S )N   c                     | At        | j                        k7  r)j                  dz
  gk7  rt        j                  |       S | S )Nr6   )lenr+   r   r   reshape)vbroadcast_shapendimsr#   s    r&   
_broadcastz+LayerNormalization.call.<locals>._broadcast   s@    LE)II%!),{{1o66Hr'   float32T)r   keepdims)axesr?   )r+   r8   r   r   result_typedtyper   castr   varrsqrtr   r)   momentsr0   compute_dtype)r#   inputsr2   r3   r=   rG   varianceinvoutputsmeanr)   r0   resr;   r<   s   `            @@r&   callzLayerNormalization.call   s   llK  #+99 	4C#.s#3OC 	4	  ++FLL)D &-0 wwvDIIEH))Ht||34C sxx
4::(>MM 
 ![[dii$OND($TZZ0*TYY2G4E))Ht||34C 5Ek%#+Cxxfll3DjslS(Gxx!3!344r'   c           	          t        | j                  t              r| j                  g}n| j                  }|D ]=  }|t        |      k\  s|t        |       k  s!t	        d| d| d| j                          |S )NzAxis z" is out of bounds for input shape z. Received: axis=)r   r   r   r8   
ValueError)r#   r2   r@   r   s       r&   compute_output_shapez'LayerNormalization.compute_output_shape   s    dii%II;D99D 	Ds;''43{3C2C+C D6 "##.- 0&&*ii[2 	 r'   c                    | j                   | j                  | j                  | j                  | j                  t        j                  | j                        t        j                  | j                        t        j                  | j                        t        j                  | j                        t        j                  | j                        t        j                  | j                        d}t        | A         }i ||S )N)r   r   r   r   r   r   r   r   r   r   r    )r   r   r   r   r   r   	serializer   r   r   r   r   r   r   r    r   
get_config)r#   configbase_configr%   s      r&   rT   zLayerNormalization.get_config   s    II||kkZZ++ , 6 6t7L7L M!-!7!78N8N!O , 6 6t7L7L M!-!7!78N8N!O*44T5I5IJ + 5 5d6K6K L
 g(*(+(((r'   )gMbP?TTFzerosonesNNNN)
__name__
__module____qualname____doc__r   r4   rN   rQ   rT   __classcell__)r%   s   @r&   r   r   
   sN    `H   &P@35j) )r'   r   N)	keras.srcr   r   r   r   r   keras.src.api_exportr   keras.src.layers.layerr	   r   r   r'   r&   <module>rb      s>     ! "  " - ( /0~) ~) 1~)r'   