
    2Vh:                         d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
  ed       G d	 d
ej                               Zy)    N)backend)initializers)layers)ops)keras_export)scipyzkeras.layers.STFTSpectrogramc                   `     e Zd ZdZ	 	 	 	 	 	 	 	 	 	 d	 fd	Zd Zd Zd Zd Zd Z	 fdZ
 xZS )
STFTSpectrograma  Layer to compute the Short-Time Fourier Transform (STFT) on a 1D signal.

    A layer that computes Spectrograms of the input signal to produce
    a spectrogram. This layers utilizes Short-Time Fourier Transform (STFT) by
    The layer computes Spectrograms based on STFT by utilizing convolution
    kernels, which allows parallelization on GPUs and trainable kernels for
    fine-tuning support. This layer allows different modes of output
    (e.g., log-scaled magnitude, phase, power spectral density, etc.) and
    provides flexibility in windowing, padding, and scaling options for the
    STFT calculation.

    Examples:

    Apply it as a non-trainable preprocessing layer on 3 audio tracks of
    1 channel, 10 seconds and sampled at 16 kHz.

    >>> layer = keras.layers.STFTSpectrogram(
    ...     mode='log',
    ...     frame_length=256,
    ...     frame_step=128,   # 50% overlap
    ...     fft_length=512,
    ...     window="hann",
    ...     padding="valid",
    ...     trainable=False,  # non-trainable, preprocessing only
    ... )
    >>> layer(keras.random.uniform(shape=(3, 160000, 1))).shape
    (3, 1249, 257)

    Apply it as a trainable processing layer on 3 stereo audio tracks of
    2 channels, 10 seconds and sampled at 16 kHz. This is initialized as the
    non-trainable layer, but then can be trained jointly within a model.

    >>> layer = keras.layers.STFTSpectrogram(
    ...     mode='log',
    ...     frame_length=256,
    ...     frame_step=128,    # 50% overlap
    ...     fft_length=512,
    ...     window="hamming",  # hamming windowing function
    ...     padding="same",    # padding to preserve the time dimension
    ...     trainable=True,    # trainable, this is the default in keras
    ... )
    >>> layer(keras.random.uniform(shape=(3, 160000, 2))).shape
    (3, 1250, 514)

    Similar to the last example, but add an extra dimension so the output is
    an image to be used with image models. We apply this here on a signal of
    3 input channels to output an image tensor, hence is directly applicable
    with an image model.

    >>> layer = keras.layers.STFTSpectrogram(
    ...     mode='log',
    ...     frame_length=256,
    ...     frame_step=128,
    ...     fft_length=512,
    ...     padding="same",
    ...     expand_dims=True,  # this adds the extra dimension
    ... )
    >>> layer(keras.random.uniform(shape=(3, 160000, 3))).shape
    (3, 1250, 257, 3)

    Args:
        mode: String, the output type of the spectrogram. Can be one of
            `"log"`, `"magnitude`", `"psd"`, `"real`", `"imag`", `"angle`",
            `"stft`". Defaults to `"log`".
        frame_length: Integer, The length of each frame (window) for STFT in
            samples. Defaults to 256.
        frame_step: Integer, the step size (hop length) between
            consecutive frames. If not provided, defaults to half the
            frame_length. Defaults to `frame_length // 2`.
        fft_length: Integer, the size of frequency bins used in the Fast-Fourier
            Transform (FFT) to apply to each frame. Should be greater than or
            equal to `frame_length`.  Recommended to be a power of two. Defaults
            to the smallest power of two that is greater than or equal
            to `frame_length`.
        window: (String or array_like), the windowing function to apply to each
            frame. Can be `"hann`" (default), `"hamming`", or a custom window
            provided as an array_like.
        periodic: Boolean, if True, the window function will be treated as
            periodic. Defaults to `False`.
        scaling: String, type of scaling applied to the window. Can be
            `"density`", `"spectrum`", or None. Default is `"density`".
        padding: String, padding strategy. Can be `"valid`" or `"same`".
            Defaults to `"valid"`.
        expand_dims: Boolean, if True, will expand the output into spectrograms
            into two dimensions to be compatible with image models.
            Defaults to `False`.
        data_format: String, either `"channels_last"` or `"channels_first"`.
            The ordering of the dimensions in the inputs. `"channels_last"`
            corresponds to inputs with shape `(batch, height, width, channels)`
            while `"channels_first"` corresponds to inputs with shape
            `(batch, channels, height, weight)`. Defaults to `"channels_last"`.

    Raises:
        ValueError: If an invalid value is provided for `"mode`", `"scaling`",
            `"padding`", or other input arguments.
        TypeError: If the input data type is not one of `"float16`",
            `"float32`", or `"float64`".

    Input shape:
        A 3D tensor of shape `(batch_size, time_length, input_channels)`, if
        `data_format=="channels_last"`, and of shape
        `(batch_size, input_channels, time_length)` if
        `data_format=="channels_first"`, where `time_length` is the length of
        the input signal, and `input_channels` is the number of input channels.
        The same kernels are applied to each channel independently.

    Output shape:
        If `data_format=="channels_first" and not expand_dims`, a 3D tensor:
            `(batch_size, input_channels * freq_channels, new_time_length)`
        If `data_format=="channels_last" and not expand_dims`, a 3D tensor:
            `(batch_size, new_time_length, input_channels * freq_channels)`
        If `data_format=="channels_first" and expand_dims`, a 4D tensor:
            `(batch_size, input_channels, new_time_length, freq_channels)`
        If `data_format=="channels_last" and expand_dims`, a 4D tensor:
            `(batch_size, new_time_length, freq_channels, input_channels)`

        where `new_time_length` depends on the padding, and `freq_channels` is
        the number of FFT bins `(fft_length // 2 + 1)`.
    c                 v   |||kD  s|dk  rt        d| d|       |||k  rt        d| d|       |!|| z  |k7  rt        j                  d|        g d}||vr t        ddj                  |       d	|       ||d
vrt        d|       |dvrt        d|       t	        |t
              r t        j                  j                  |d       t        | (  di | || _        || _        || _        |xs | j                  dz  | _        || _        |xs4 dt!        t#        j$                  t#        j&                  |                  z  | _        || _        || _        || _        || _        |	| _        t5        j6                  |
      | _        t:        j<                  j?                  d      | _        y )N   z_`frame_step` should be a positive integer not greater than `frame_length`. Received frame_step=z, frame_length=zI`fft_length` should be not less than `frame_length`. Received fft_length=zF`fft_length` is recommended to be a power of two. Received fft_length=)log	magnitudepsdrealimaganglestftz*Output mode is invalid, it must be one of z, z. Received: mode=)densityspectrumzQScaling is invalid, it must be `None`, 'density' or 'spectrum'. Received scaling=)validsamezDPadding is invalid, it should be 'valid', 'same'. Received: padding=      )ndim ) 
ValueErrorwarningswarnjoin
isinstancestrr   signal
get_windowsuper__init__modeframe_length
frame_step_frame_step
fft_lengthintmathceillog2_fft_lengthwindowperiodicscalingpaddingexpand_dimsr   standardize_data_formatdata_formatr   
input_spec	InputSpec)selfr&   r'   r(   r*   r0   r1   r2   r3   r4   r6   kwargs	all_modes	__class__s                _/home/dcms/DCMS/lib/python3.12/site-packages/keras/src/layers/preprocessing/stft_spectrogram.pyr%   zSTFTSpectrogram.__init__   s    !%a77Al C  ,~/  !j<&?''1l/,Q 
 !zZK'?J&NMM''1l4
 Q	y <99Y'((9$A 
 72I#I33:)= 
 ++%%,I/ 
 fc"LL##FA."6"	($%?):):a)?$% 
TYYtyy6788 	  &"::;G ++5515=    c           
         | j                   d| j                  dz  dz   f}| j                  dk7  rM| j                  d|t	        j
                  d| j                  | j                  | j                              | _	        | j                  dk7  rN| j                  d|t	        j
                  d| j                  | j                  | j                              | _
        y y )Nr   r   r   real_kernelr   )nameshapeinitializerimag_kernel)r'   r/   r&   
add_weightr   STFTr0   r2   r1   r@   rD   )r9   input_shaperB   s      r=   buildzSTFTSpectrogram.build   s    ""At'7'71'<q'@A99#"(--DKKt}}  /  D 99#"(--DKKt}}  /  D r>   c                    t        j                  |      \  }}}}d}| j                  dk(  r\| j                  rt        j                  |g d      }|S t        j
                  ||||z  |g      }t        j                  |g d      }|S | j                  rt        j                  |g d      }|S t        j
                  ||||z  |g      }|S )Nchannels_last)r   r   r   r   r   r   r   )r   r   r   r   )r   rB   r6   r4   	transposereshape)r9   outputs_channelsfreq_channelstime_seq
batch_sizes          r=   _adjust_shapeszSTFTSpectrogram._adjust_shapes   s    /2yy/A,8]H
.-->$  ++M!98D
 --;  --> 	 ++M!98D r>   c           	         | j                   dk(  rKt        j                  |      \  }}}t        j                  |g d      }t        j                  |d|dg      }n2t        j                  |      \  }}}t        j                  |dd|g      }t        j
                  |t        j                  |t        j                  |j                              | j                  | j                  | j                         }d}| j                   dk(  rMt        j                  |      \  }}}t        j                  |g d      }t        j                  |||||g      }|S t        j                  |      \  }}}t        j                  |||||g      }|S )NrK   rL   rJ   r   )r3   stridesr6   )r6   r   rB   rM   rN   convcastr   standardize_dtypedtyper3   r)   )	r9   inputskernelrP   rS   rQ   rO   rT   rR   s	            r=   _apply_convzSTFTSpectrogram._apply_conv  sX   .$'IIf$5!Ax]]695F[["h):;F$'IIf$5!Ax[["a):;F((HHVW66v||DELL$$((
 
.),7);&AxmmGY7GkkX}h?G  *-7);&A}hkkX}h?G r>   c                    |j                   }t        j                  |      dvrt        d|       d }d }d }| j                  dk7  r| j                  || j                        }| j                  dk7  r| j                  || j                        }| j                  dk(  r| j                  |      S | j                  dk(  r| j                  |      S | j                  dk(  r%| j                  t        j                  ||            S | j                  dk(  r(| j                  t        j                  ||gd            S t        j                  |      t        j                  |      z   }| j                  d	k(  rC| j                  |t        j                  |d d d d d
dd d f   ddgddgd
d
gddgg      z         S | j                  t        j                  t        j                  |t        j                                            }| j                  dk(  r|S t        j"                  t        j                  |t        j                                      S )N>   float16float32float64zUInvalid input type. Expected `float16`, `float32` or `float64`. Received: input type=r   r   r   r   r   )axisr   r   rJ   r   r   )r[   r   rZ   	TypeErrorr&   r^   r@   rD   rU   r   arctan2concatenatesquarepadsqrtmaximumepsilonr   )r9   r\   r[   real_signalimag_signalpowerlinear_stfts          r=   callzSTFTSpectrogram.call"  s   $$U+ 4
 

 338'; 
 99**643C3CDK99**643C3CDK99&&{33YY& &&{33YY'!&&s{{;'LMMYY& &&k :C  JJ{+cjj.EEE99&&''!Q"a-(Aq6Aq6Aq6Aq6*J  ))HHS[[(9:;
 99#773;;{GOO4EFGGr>   c                    | j                   dk(  r|d   }n|d   }| j                  dz  dz   }| j                  dk(  r|dz  }t        j                  j                  |||z  | j                  f| j                  | j                  | j                         }| j                   dk(  r|\  }}}n|\  }}}| j                  r| j                   dk(  r||||fS ||||fS |S )NrK   rJ   r   r   r   )rW   r3   r6   )
r6   r/   r&   r   operation_utilscompute_conv_output_shaper'   r)   r3   r4   )r9   rG   rQ   rR   rB   rT   rS   rP   s           r=   compute_output_shapez$STFTSpectrogram.compute_output_shapeT  s    ."2H"1~H((A-199QM##==H$ $$LL(( > 
 .&+#J!&+#J8?2"HmXFF"HhFFr>   c                 $   t         |          }|j                  | j                  | j                  | j
                  | j                  | j                  | j                  | j                  | j                  | j                  | j                  d
       |S )N)
r&   r'   r(   r*   r0   r1   r2   r3   r6   r4   )r$   
get_configupdater&   r'   r(   r*   r0   r1   r2   r3   r6   r4   )r9   configr<   s     r=   rv   zSTFTSpectrogram.get_configo  st    #%		 $ 1 1"oo"oo++ MM<<<<#//#//	
 r>   )
r      NNhannFr   r   FN)__name__
__module____qualname____doc__r%   rH   rU   r^   rp   rt   rv   __classcell__)r<   s   @r=   r
   r
      sW    vt M>^(2@0Hd6 r>   r
   )r,   r   	keras.srcr   r   r   r   keras.src.api_exportr   keras.src.utils.module_utilsr   Layerr
   r   r>   r=   <module>r      sE       "   - . ,-rfll r .rr>   