
    2Vh                     T    d dl mZ d dlmZ d dlmZ  ed       G d de             Zy)    )ops)keras_export)	Attentionzkeras.layers.AdditiveAttentionc                   >     e Zd ZdZ	 	 d fd	Zd Zd Z fdZ xZS )AdditiveAttentiona[
  Additive attention layer, a.k.a. Bahdanau-style attention.

    Inputs are a list with 2 or 3 elements:
    1. A `query` tensor of shape `(batch_size, Tq, dim)`.
    2. A `value` tensor of shape `(batch_size, Tv, dim)`.
    3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none
        supplied, `value` will be used as `key`.

    The calculation follows the steps:
    1. Calculate attention scores using `query` and `key` with shape
        `(batch_size, Tq, Tv)` as a non-linear sum
        `scores = reduce_sum(tanh(query + key), axis=-1)`.
    2. Use scores to calculate a softmax distribution with shape
        `(batch_size, Tq, Tv)`.
    3. Use the softmax distribution to create a linear combination of `value`
        with shape `(batch_size, Tq, dim)`.

    Args:
        use_scale: If `True`, will create a scalar variable to scale the
            attention scores.
        dropout: Float between 0 and 1. Fraction of the units to drop for the
            attention scores. Defaults to `0.0`.

    Call arguments:
        inputs: List of the following tensors:
            - `query`: Query tensor of shape `(batch_size, Tq, dim)`.
            - `value`: Value tensor of shape `(batch_size, Tv, dim)`.
            - `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If
                not given, will use `value` for both `key` and `value`, which is
                the most common case.
        mask: List of the following tensors:
            - `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`.
                If given, the output will be zero at the positions where
                `mask==False`.
            - `value_mask`: A boolean mask tensor of shape `(batch_size, Tv)`.
                If given, will apply the mask such that values at positions
                 where `mask==False` do not contribute to the result.
        return_attention_scores: bool, it `True`, returns the attention scores
            (after masking and softmax) as an additional output argument.
        training: Python boolean indicating whether the layer should behave in
            training mode (adding dropout) or in inference mode (no dropout).
        use_causal_mask: Boolean. Set to `True` for decoder self-attention. Adds
            a mask such that position `i` cannot attend to positions `j > i`.
            This prevents the flow of information from the future towards the
            past. Defaults to `False`.

    Output:
        Attention outputs of shape `(batch_size, Tq, dim)`.
        (Optional) Attention scores after masking and softmax with shape
            `(batch_size, Tq, Tv)`.
    c                 *    t        |   d||d| y )N)	use_scaledropout )super__init__)selfr	   r
   kwargs	__class__s       ]/home/dcms/DCMS/lib/python3.12/site-packages/keras/src/layers/attention/additive_attention.pyr   zAdditiveAttention.__init__<   s     	H9gHH    c                     | j                  |       |d   d   }d | _        | j                  r'| j                  d|gd| j                  d      | _        y y )Nr   scaleglorot_uniformT)nameshapeinitializerdtype	trainable)_validate_inputsr   r	   
add_weightr   )r   input_shapedims      r   buildzAdditiveAttention.buildD   s[    k*!nR 
>>e,jj ) DJ r   c                     t        j                  |d      }t        j                  |d      }| j                  r| j                  nd}t        j                  |t        j
                  ||z         z  d      S )a  Calculates attention scores as a nonlinear sum of query and key.

        Args:
            query: Query tensor of shape `(batch_size, Tq, dim)`.
            key: Key tensor of shape `(batch_size, Tv, dim)`.

        Returns:
            Tensor of shape `(batch_size, Tq, Tv)`.
        )axisg      ?r   )r   expand_dimsr	   r   sumtanh)r   querykey
q_reshaped
k_reshapedr   s         r   _calculate_scoresz#AdditiveAttention._calculate_scoresQ   sY     __U4
__Sr2
"nn

#wwusxx
Z(?@@rJJr   c                 *    t         |          }|d= |S )N
score_mode)r   
get_config)r   base_configr   s     r   r/   zAdditiveAttention.get_configc   s    g(*%r   )Tg        )	__name__
__module____qualname____doc__r   r    r,   r/   __classcell__)r   s   @r   r   r      s,    2l IK$ r   r   N)	keras.srcr   keras.src.api_exportr   $keras.src.layers.attention.attentionr   r   r   r   r   <module>r9      s2     - : ./_	 _ 0_r   