
    2Vh8                     P    d dl mZ d dlmZ dZdZ ed       G d de             Zy)	    )keras_export)TFDataLayerg     @g     @zkeras.layers.MelSpectrogramc                        e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 d fd	Zd Zd Zd Zd Zd Z		 	 	 	 	 	 ddZ
d	 Z fd
Z xZS )MelSpectrograma3  A preprocessing layer to convert raw audio signals to Mel spectrograms.

    This layer takes `float32`/`float64` single or batched audio signal as
    inputs and computes the Mel spectrogram using Short-Time Fourier Transform
    and Mel scaling. The input should be a 1D (unbatched) or 2D (batched) tensor
    representing audio signals. The output will be a 2D or 3D tensor
    representing Mel spectrograms.

    A spectrogram is an image-like representation that shows the frequency
    spectrum of a signal over time. It uses x-axis to represent time, y-axis to
    represent frequency, and each pixel to represent intensity.
    Mel spectrograms are a special type of spectrogram that use the mel scale,
    which approximates how humans perceive sound. They are commonly used in
    speech and music processing tasks like speech recognition, speaker
    identification, and music genre classification.

    References:
    - [Spectrogram](https://en.wikipedia.org/wiki/Spectrogram),
    - [Mel scale](https://en.wikipedia.org/wiki/Mel_scale).

    Examples:

    **Unbatched audio signal**

    >>> layer = keras.layers.MelSpectrogram(num_mel_bins=64,
    ...                                     sampling_rate=8000,
    ...                                     sequence_stride=256,
    ...                                     fft_length=2048)
    >>> layer(keras.random.uniform(shape=(16000,))).shape
    (64, 63)

    **Batched audio signal**

    >>> layer = keras.layers.MelSpectrogram(num_mel_bins=80,
    ...                                     sampling_rate=8000,
    ...                                     sequence_stride=128,
    ...                                     fft_length=2048)
    >>> layer(keras.random.uniform(shape=(2, 16000))).shape
    (2, 80, 125)

    Input shape:
        1D (unbatched) or 2D (batched) tensor with shape:`(..., samples)`.

    Output shape:
        2D (unbatched) or 3D (batched) tensor with
        shape:`(..., num_mel_bins, time)`.

    Args:
        fft_length: Integer, size of the FFT window.
        sequence_stride: Integer, number of samples between successive STFT
            columns.
        sequence_length: Integer, size of the window used for applying
            `window` to each audio frame. If `None`, defaults to `fft_length`.
        window: String, name of the window function to use. Available values
            are `"hann"` and `"hamming"`. If `window` is a tensor, it will be
            used directly as the window and its length must be
            `sequence_length`. If `window` is `None`, no windowing is
            used. Defaults to `"hann"`.
        sampling_rate: Integer, sample rate of the input signal.
        num_mel_bins: Integer, number of mel bins to generate.
        min_freq: Float, minimum frequency of the mel bins.
        max_freq: Float, maximum frequency of the mel bins.
            If `None`, defaults to `sampling_rate / 2`.
        power_to_db: If True, convert the power spectrogram to decibels.
        top_db: Float, minimum negative cut-off `max(10 * log10(S)) - top_db`.
        mag_exp: Float, exponent for the magnitude spectrogram.
            1 for magnitude, 2 for power, etc. Default is 2.
        ref_power: Float, the power is scaled relative to it
            `10 * log10(S / ref_power)`.
        min_power: Float, minimum value for power and `ref_power`.
    c                    || _         || _        |xs || _        || _        || _        || _        || _        |xs t        |dz        | _        |	| _	        |
| _
        || _        || _        || _        t        | <  di | y )N    )
fft_lengthsequence_stridesequence_lengthwindowsampling_ratenum_mel_binsmin_freqintmax_freqpower_to_dbtop_dbmag_exp	min_power	ref_powersuper__init__)selfr
   r   r   r   r   r   r   r   r   r   r   r   r   kwargs	__class__s                  ^/home/dcms/DCMS/lib/python3.12/site-packages/keras/src/layers/preprocessing/mel_spectrogram.pyr   zMelSpectrogram.__init__S   s    " %..<**(  :C(9$:&"""6"    c                    | j                   dvrdn| j                   }| j                  j                  ||      }| j                  |      }| j	                  |      }| j
                  r| j                  |      }| j                  j                  j                  |dd      }| j                  j                  || j                         }|S )N)float32float64r    )dtype)
compute_dtypebackendconvert_to_tensor_spectrogram	_melscaler   _dbscalenumpyswapaxescast)r   inputsr"   outputss       r   callzMelSpectrogram.calls   s     !!)?? ## 	
 //e/D##F+..)mmG,G,,$$--gr2>,,##GT-?-?@r   c                 0   | j                   j                  j                  || j                  | j                  | j
                  | j                  d      \  }}| j                   j                  j                  | j                   j                  j                  | j                   j                  j                  |      | j                   j                  j                  |                  }| j                   j                  j                  || j                        }|S )NT)r   r   r
   r   center)r&   mathstftr   r   r
   r   r+   sqrtaddsquarepowerr   )r   r.   realimagspecs        r   r(   zMelSpectrogram._spectrogram   s    \\&&++ 00 00;; , 

d ||!!&&LL""""))$/1C1C1J1J41P

 ||!!''dll;r   c                    | j                  | j                  | j                  j                  |      d   | j                  | j
                  | j                        }| j                  j                  j                  ||d      S )Nr#   )r   num_spectrogram_binsr   lower_edge_hertzupper_edge_hertz   )axes)	linear_to_mel_weight_matrixr   r&   shaper   r   r   r+   	tensordot)r   r.   matrixs      r   r)   zMelSpectrogram._melscale   sr    11**!%!3!3F!;B!?,,!]]!]] 2 
 ||!!++FF+CCr   c                    d| j                   j                  j                  | j                   j                  j                  || j                              z  }| j                   j                  j                  | j                   j                  | j                              }|d| j                   j                  j                  | j                   j                  j                  || j                              z  z  }| j                   j                  j                  || j                   j                  j                  |      | j                  z
        }|S )Ng      $@)
r&   r+   log10maximumr   absr'   r   maxr   )r   r.   log_spec	ref_values       r   r*   zMelSpectrogram._dbscale   s   LL$$""**64>>B

 LL&&**LL**4>>:
	 	D4<<--33LL&&y$..A
 
 	
 <<%%--dll((,,X6D
 r   c                 n    t         | j                  j                  j                  d|t        z  z         z  S )ao  Converts frequencies in `frequencies_hertz` in Hertz to the
            mel scale.

        Args:
            frequencies_hertz: A tensor of frequencies in Hertz.
            name: An optional name for the operation.

        Returns:
            A tensor of the same shape and type of `frequencies_hertz`
            containing frequencies in the mel scale.
              ?)_MEL_HIGH_FREQUENCY_Qr&   r+   log_MEL_BREAK_FREQUENCY_HERTZ)r   frequencies_hertzs     r   _hertz_to_melzMelSpectrogram._hertz_to_mel   s7     %t||'9'9'='=$'AAB(
 
 	
r   c                 $     j                   j                  ||      } j                   j                  ||      } j                   j                  ||      } j                   j                  d|      }d}|dz  }	 j                   j                  j	                  ||	|      |d }
 j                   j                  j                   j                  |
      d      } j                   j                  j                   j                   j                  j	                   j                  |       j                  |      dz         dd      }t         fd j                   j                  j                  |dd	      D              \  }}}||z
  ||z
  z  }||z
  ||z
  z  } j                   j                  j                  | j                   j                  j                  ||            } j                   j                  j                  ||d
gd
d
gg      S )a
  Returns a matrix to warp linear scale spectrograms to the mel scale.

        Returns a weight matrix that can be used to re-weight a tensor
        containing `num_spectrogram_bins` linearly sampled frequency information
        from `[0, sampling_rate / 2]` into `num_mel_bins` frequency information
        from `[lower_edge_hertz, upper_edge_hertz]` on the mel scale.

        This function follows the [Hidden Markov Model Toolkit (HTK)](
        http://htk.eng.cam.ac.uk/) convention, defining the mel scale in
        terms of a frequency in hertz according to the following formula:

        ```mel(f) = 2595 * log10( 1 + f/700)```

        In the returned matrix, all the triangles (filterbanks) have a peak
        value of 1.0.

        For example, the returned matrix `A` can be used to right-multiply a
        spectrogram `S` of shape `[frames, num_spectrogram_bins]` of linear
        scale spectrum values (e.g. STFT magnitudes) to generate a
        "mel spectrogram" `M` of shape `[frames, num_mel_bins]`.

        ```
        # `S` has shape [frames, num_spectrogram_bins]
        # `M` has shape [frames, num_mel_bins]
        M = keras.ops.matmul(S, A)
        ```

        The matrix can be used with `keras.ops.tensordot` to convert an
        arbitrary rank `Tensor` of linear-scale spectral bins into the
        mel scale.

        ```
        # S has shape [..., num_spectrogram_bins].
        # M has shape [..., num_mel_bins].
        M = keras.ops.tensordot(S, A, 1)
        ```

        References:
        - [Mel scale (Wikipedia)](https://en.wikipedia.org/wiki/Mel_scale)

        Args:
            num_mel_bins: Python int. How many bands in the resulting
                mel spectrum.
            num_spectrogram_bins: An integer `Tensor`. How many bins there are
                in the source spectrogram data, which is understood to be
                `fft_size // 2 + 1`, i.e. the spectrogram only contains the
                nonredundant FFT bins.
            sampling_rate: An integer or float `Tensor`. Samples per second of
                the input signal used to create the spectrogram. Used to figure
                out the frequencies corresponding to each spectrogram bin,
                which dictates how they are mapped into the mel scale.
            lower_edge_hertz: Python float. Lower bound on the frequencies to be
                included in the mel spectrum. This corresponds to the lower
                edge of the lowest triangular band.
            upper_edge_hertz: Python float. The desired top edge of the highest
                frequency band.
            dtype: The `DType` of the result matrix. Must be a floating point
                type.

        Returns:
            A tensor of shape `[num_spectrogram_bins, num_mel_bins]`.
        g        r@          @Nr      )r   r   c              3   n   K   | ],  }j                   j                  j                  |d g       . yw)r@   N)r&   r+   reshape).0tr   r   s     r   	<genexpr>z=MelSpectrogram.linear_to_mel_weight_matrix.<locals>.<genexpr>.  s5      ;
 LL&&q1l*;<;
s   25)axisr   )r&   r-   r'   r+   linspaceexpand_dimsrS   r3   extract_sequencestuplesplitrH   minimumpad)r   r   r=   r   r>   r?   r"   zerobands_to_zeronyquist_hertzlinear_frequenciesspectrogram_bins_melband_edges_mellower_edge_mel
center_melupper_edge_mellower_slopesupper_slopesmel_weights_matrixs   ``                 r   rB   z*MelSpectrogram.linear_to_mel_weight_matrix   s   T ))-?<<99
  <<99
 ||--c59 %+!\\//88-!5

.  $||11==12A 
 **<<LL''""#34""#34q 
  = 
 6; ;
\\''--naa-H;
 6
2
N -~='
 ')==Z'

 "\\//77$,,$$,,\<H

 ||!!%%Q!Q(
 	
r   c                    t        |      dk(  r:| j                  |d   %|d   | j                  z   dz   | j                  z  g}|S d g}|S |d   | j                  |d   "|d   | j                  z   dz   | j                  z  nd g}|S )Nr@   r   )lenr   r   )r   input_shapeoutput_shapes      r   compute_output_shapez#MelSpectrogram.compute_output_shapeG  s    {q !! #1~1 !^d&:&::Q>++,L(  L(  A!! #1~1 !^d&:&::Q>++, 	L r   c                 f   t         |          }|j                  | j                  | j                  | j
                  | j                  | j                  | j                  | j                  | j                  | j                  | j                  | j                  | j                  | j                  d       |S )N)r
   r   r   r   r   r   r   r   r   r   r   r   r   )r   
get_configupdater
   r   r   r   r   r   r   r   r   r   r   r   r   )r   configr   s     r   rv   zMelSpectrogram.get_config_  s    #%"oo#'#7#7#'#7#7++!%!3!3 $ 1 1 MM MM#//++<<!^^!^^	
" r   )i   i   Nhanni>     g      4@NTg      T@rU   g|=rN   )      i@  g     @_@g     @r    )__name__
__module____qualname____doc__r   r0   r(   r)   r*   rS   rB   rt   rv   __classcell__)r   s   @r   r   r   	   s}    FT #@ $D"
$  E
N0 r   r   N)keras.src.api_exportr   ,keras.src.layers.preprocessing.tf_data_layerr   rQ   rO   r   r	   r   r   <module>r      s=    - D #   +,h[ h -hr   