
    2VhZ                     |   d dl Z d dlZd dlZd dlmc mZ d dlmZ	 d dlm
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d
 Zd Zd Zd Zd Zd Zd Zd Zd?dZd Z d Z!d@dZ"d Z#dAdZ$d Z%d Z&dBdZ'd Z(dCdZ)dBdZ*dDdZ+d Z,d?d Z-d! Z.dDd"Z/dDd#Z0dDd$Z1	 	 dEd%Z2	 	 dFd&Z3	 	 	 dGd'Z4	 dHd(Z5	 	 dId)Z6	 	 	 	 dJd*Z7	 	 	 	 dJd+Z8	 	 	 	 dJd,Z9	 	 	 	 	 dKd-Z:dLd.Z;dLd/Z<dMd0Z=dMd1Z>dNd2Z?dOd3Z@	 dPd4ZAdQd5ZB	 	 dRd6ZC	 	 	 dSd7ZD	 	 	 	 	 dTd8ZEd9 ZFdNd:ZGd; ZHd< ZI	 	 	 	 dUd=ZJ	 	 	 	 	 	 dVd>ZKy)W    N)lax)nn)splash_attention_kernel)splash_attention_mask)backend)+compute_conv_transpose_padding_args_for_jax)cast)convert_to_tensorc                 B    t        |       } t        j                  |       S N)r
   jnnreluxs    H/home/dcms/DCMS/lib/python3.12/site-packages/keras/src/backend/jax/nn.pyr   r          !A88A;    c                 B    t        |       } t        j                  |       S r   )r
   r   relu6r   s    r   r   r      s    !A99Q<r   c                 B    t        |       } t        j                  |       S r   )r
   r   sigmoidr   s    r   r   r   "   s    !A;;q>r   c                 B    t        |       } t        j                  |       S r   )r
   r   sparse_sigmoidr   s    r   r   r   '   s    !Aa  r   c                 B    t        |       } t        j                  |       S r   )r
   r   tanhr   s    r   r   r   ,   r   r   c                 H    t        |       } | t        j                  |       z
  S r   )r
   jnpr   r   s    r   tanh_shrinkr   1   s    !Asxx{?r   c                 B    t        |       } t        j                  |       S r   )r
   r   softplusr   s    r   r    r    6   s    !A<<?r   c                 B    t        |       } t        j                  |       S r   )r
   r   	soft_signr   s    r   softsignr#   ;       !A==r   c           	          t        |       } t        j                  | |kD  | |z
  t        j                  | | k  | |z   d            S N        r
   r   wherer   	thresholds     r   soft_shrinkr,   @   sE    !A99	I	I		!yj.!i-5 r   c                 B    t        |       } t        j                  |       S r   )r
   r   sparse_plusr   s    r   r.   r.   I       !A??1r   c                 B    t        |       } t        j                  |       S r   )r
   r   silur   s    r   r1   r1   N   r   r   c                 F    t        |       } t        j                  | |      S )N)b)r
   r   
squareplus)r   r3   s     r   r4   r4   S   s    !A>>!q!!r   c                 B    t        |       } t        j                  |       S r   )r
   r   log_sigmoidr   s    r   r6   r6   X   r/   r   c                 F    t        |       } t        j                  | |      S )N)negative_slope)r
   r   
leaky_relu)r   r8   s     r   r9   r9   ]   s    !A>>!N;;r   c                 B    t        |       } t        j                  |       S r   )r
   r   hard_sigmoidr   s    r   r;   r;   b   s    !AAr   c                 B    t        |       } t        j                  |       S r   )r
   r   	hard_silur   s    r   r=   r=   g   r$   r   c                 F    t        |       } t        j                  | |      S N)alpha)r
   r   elur   r@   s     r   rA   rA   l   s    !A771E""r   c                 B    t        |       } t        j                  |       S r   )r
   r   selur   s    r   rD   rD   q   r   r   c                 D    t        |       } t        j                  | |      S r   )r
   r   gelu)r   approximates     r   rF   rF   v   s    !A88A{##r   c                 F    t        |       } t        j                  | |      S r?   )r
   r   celurB   s     r   rI   rI   {   s    !A88AU##r   c                 F    t        |       } t        j                  | |      S Naxis)r
   r   glur   rM   s     r   rN   rN      s    !A7714  r   c                 B    t        |       } t        j                  |       S r   )r
   r   	hard_tanhr   s    r   rQ   rQ      r$   r   c                 r    t        |       } t        j                  t        j                  |       |kD  | d      S r&   )r
   r   r)   absr*   s     r   hard_shrinkrT      s,    !A99SWWQZ)+Q44r   c                 L    t        |       } t        j                  | |kD  | |      S r   r(   )r   r+   default_values      r   r+   r+      s#    !A99Q]A}55r   c                 F    t        |       } t        j                  | |      S rK   )r
   r   softmaxrO   s     r   rX   rX      s    !A;;qt$$r   c                 F    t        |       } t        j                  | |      S rK   )r
   r   log_softmaxrO   s     r   rZ   rZ      s    !A??14((r   c                    t        |       } dt        j                  | dz  |      z  }t        j                  ||      }t        j                  d| j
                  |   dz         }dg| j                  z  }d||<   |j                  |      }||dz
  |z  z
  dkD  }t        j                  ||d      }t        j                  ||d      }t        j                  ||d      dz
  |z  }	t        j                  | |	z
  d      }
|
S )	N      rL      r   TrM   keepdimsr'   )r
   r   sortcumsumarangeshapendimreshapesumr)   maximum)logitsrM   logits_sortedlogits_cumsumrr_shapesupportklogits_cumsum_safetauoutputs              r   	sparsemaxrs      s    v&F388FTM==MJJ}48M

1fll4(1,-AcFKKGGDM			'A}q0A559GdT2A7M3?77%D4@1D
IC[[#s+FMr   c                 f    t        | t              r| f|z  n| } |s| S |dk(  r
d| z   dz   } | S d| z   } | S )Nchannels_lastr]   )r]   r]   )
isinstanceint)r   num_spatial_dimsdata_formatinclude_batch_and_channelss       r   _convert_to_spatial_operandr|      sP     $.a#51A%o%1HtO H !OHr   c                 |    |dvrt        d| d      |j                         }t        j                  | |||||      S )aC  Helper function to define pooling functions.

    Args:
        inputs: input data of shape `N+2`.
        initial_value: the initial value for the reduction.
        reduce_fn: a reduce function of the form `(T, T) -> T`.
        pool_size: a sequence of `N` integers, representing the window size to
            reduce over.
        strides: a sequence of `N` integers, representing the inter-window
            strides (default: `(1, ..., 1)`).
        padding: either the string `same` or `valid`.

    Returns:
        The output of the reduction for each window slice.
    )samevalidzInvalid padding 'z', must be 'same' or 'valid'.)
ValueErrorupperr   reduce_window)inputsinitial_value	reduce_fn	pool_sizestridespaddings         r   _poolr      sY    . ''y(EF
 	
 mmoG r   c                     t        j                  |      }| j                  dz
  }t        |||      }||n|}t        |||      }t	        | t
        j                   t        j                  |||      S )N   )	r   standardize_data_formatre   r|   r   r   infr   max)r   r   r   r   rz   ry   s         r   max_poolr      st     11+>K{{Q+#[I #?iG)!;G #''377IwHHr   c                    t        j                  |      }| j                  dz
  }t        |||      }||n|}t        |||      }t	        | dt
        j                  |||      }|dk(  r|t        j                  |      z  S t        | j                  |      D cg c]  \  }}|dk7  r|nd }	}}t	        t        j                  |	| j                        dt
        j                  |||      }
||
z  S c c}}w )Nr   r'   r   r]   )r   r   re   r|   r   r   addmathprodziprd   r   onesdtype)r   r   r   r   rz   ry   pooledar3   rd   window_countss              r   average_poolr      s    11+>K{{Q+#[I #?iG)!;G 63GWEF'		),,, 036<</K
%+a!q&Qa
 
 HHUFLL)GG
 %%
s   C0c                 N   | dz   }|dk(  r#t        t        d|dz
              }d|dz
  f|z   }nt        t        d|            }d|z   }|r#|dz
  |dz
  ft        t        |dz
              z   }n"|dz
  |dz
  ft        t        |dz
              z   }t        j                  |||      S )z9Create a `lax.ConvDimensionNumbers` for the given inputs.r   ru   r]   r   r   r]   )lhs_specrhs_specout_spec)tupleranger   ConvDimensionNumbers)ry   rz   	transposenum_dimsspatial_dims	inputs_dn	kernel_dns          r   &_convert_to_lax_conv_dimension_numbersr   "  s      !#Ho%U1hl341%4	U1h/0\)	\8a<05x!|9L3MM	\8a<05x!|9L3MM	##Y r   c           	         t        j                  |      }| j                  dz
  }t        ||d      }t	        |||d      }t	        |||d      }|dk(  r| j
                  d   }n| j
                  d   }|j
                  d   }	||	z  d	kD  rt        d
| d|	 d      ||	z  }
t        |      }t        | |j                        } t        j                  j                  | ||||||
      S )Nr   Fr   r{   ru   r^   r]   r   zgThe number of input channels must be evenly divisible by kernel's in_channels. Received input channels z and kernel in_channels z. r   rhs_dilationdimension_numbersfeature_group_count)r   r   re   r   r|   rd   r   r
   r   jaxr   conv_general_dilated)r   kernelr   r   rz   dilation_ratery   r   channelskernel_in_channelsr   s              r   convr   ;  s+    11+>K{{Q>
 *#(	G 0#(	M o%<<#<<?b)$$q(==EJ G""4!5R9
 	

 #&88v&FvV\\:F77''"+/ (  r   c           	         t        j                  |      }| j                  dz
  }t        ||d      }t	        |||d      }t	        |||d      }|dk(  r| j
                  d   n| j
                  d   }t        j                  ||j
                  d d d||j
                  d   z  fz         }t        j                  j                  | ||||||	      S )
Nr   Fr   r   ru   r^   r]   r   r   )r   r   re   r   r|   rd   r   rf   r   r   r   )	r   r   r   r   rz   r   ry   r   r   s	            r   depthwise_convr   o  s     11+>K{{Q>
 *#(	G 0#(	M (?:RQ  [[SbQ 3fll26F FGGF 77''"+/ (  r   c                 n    t        j                  |      }t        | |||||      }t        ||dd||      S )Nr]   r   )r   r   rz   r   )r   r   r   r   )r   depthwise_kernelpointwise_kernelr   r   rz   r   depthwise_conv_outputs           r   separable_convr     sR     11+>K* # r   c           	      8   t        j                  |      }| j                  dz
  }t        | j                  |j                  ||||      }t        ||d      }	t        |||d      }t        |||d      }t        j                  j                  | |||||	d      S )Nr   )input_shapekernel_shaper   r   output_paddingr   Fr   r   T)r   r   r   transpose_kernel)
r   r   re   r   rd   r   r|   r   r   conv_transpose)
r   r   r   r   r   rz   r   ry   padding_valuesr   s
             r   r   r     s     11+>K{{Q@LL\\%#N ?
 *#(	G 0#(	M 77!!"+ "  r   c                 ,   t        |       } |rd|dk  r|t        | j                        z   dz   }|d}t        j                  t        j
                  |       d      j                  |      }|j                  d   }| j                  D cg c]  }t        j                  |       }}t        j                  |ddi}|j                  |t        j                  | d             |D 	cg c]#  }	|	j                  |d      j                  d      % }}	t        j                  |d      }t        | j                        }
|
j                  ||       t        |
      }
t        j                   ||f|
dd	      S t#        j$                  | |||
      S c c}w c c}	w )Nr   r]   float32indexingijint32rL   Trd   indices_sortedunique_indicesrM   r   )r
   lenrd   r   greater_equalravelastyperc   meshgridinsertrh   rf   concatenatelistr   
jax_sparseBCOOr   one_hot)r   num_classesrM   r   sparsevaluesvalues_countdimindicesr   rd   s              r   r   r     sW   !A!8#agg,&*D=E ""399Q<3::5A||A./gg6s3::c?66,,7$7tS[[A./GNO!199\1-44W=OO//'2QWWT;'eW	
 	
 ;;q+D>> 7 Ps   F%(Fc                    t        |       } t        | j                        dkD  rdnd}|rt        | ||d|      }t	        j
                  ||f      }t	        j                  |      }t        j                  |j                  d      j                  |      }t	        j                  ||j                  f|j                  dd      S t        j                  t        t        | d      |||      |	      S )
Nr]   r   r   )rM   r   r   axesTr   r   rL   )r
   r   rd   r   r   bcoo_reduce_sumbcoo_sum_duplicatesr   r   datar   r   r   r   r	   )r   r   rM   r   r   reduction_axisresultr   s           r   	multi_hotr     s    !Aagg,*QN{WV
 ++F.9JK//7""6;;299%@V^^$,,	
 	
 77Q +DF r   c                    t        j                  |       } t        j                  |      }| j                  |j                  k7  r%t        d| j                   d|j                         t	        | j                        dk  r%t        d| j                   d|j                         |r"t
        j                  j                  ||      }nn|t        j                  ||d      z  }t        j                  |t        j                         dt        j                         z
        }t        j                  |      }t        j                  | |z  |       S )	NQArguments `target` and `output` must have the same shape. Received: target.shape=, output.shape=r]   zPArguments `target` and `output` must be at least rank 1. Received: target.shape=rL   Tr`         ?)r   arrayrd   r   r   r   r   rZ   rg   clipr   epsilonlogtargetrr   from_logitsrM   log_probs        r   categorical_crossentropyr     s   YYvFYYvF||v||#"LL>H
 	

 6<<1"LL>H
 	
 66%%f4%8#''&$>>&'//"3S7??;L5LM776?GGFX%D111r   c                    t        j                  | d      } t        j                  |      }t        | j                        t        |j                        k(  r)| j                  d   dk(  rt        j                  | d      } t        |j                        dk  rt        d|j                         | j                  |j                  d d k7  r%t        d| j                   d|j                         |r"t        j                  j                  ||      }nn|t        j                  ||d	
      z  }t        j                  |t        j                         dt        j                         z
        }t        j                  |      }t        j                  | |j                  |   |      } t        j                  | |z  |       S )Nr   r   r^   r]   rL   zBArgument `output` must be at least rank 1. Received: output.shape=zcArguments `target` and `output` must have the same shape up until the last dimension: target.shape=r   Tr   r   )r   r   r   rd   squeezer   r   r   rZ   rg   r   r   r   r   r   r   r   s        r   sparse_categorical_crossentropyr   5  sj   YYvW-FYYvF
6<<C--&,,r2Ba2GV"-
6<<1"LL>+
 	

 ||v||CR(("LL>H
 	

 66%%f4%8#''&$>>&'//"3S7??;L5LM776?[[d!3$?FGGFX%D111r   c                 j   t        j                  |       } t        j                  |      }| j                  |j                  k7  r%t        d| j                   d|j                         |rPt        j
                  j                  |      }t        j
                  j                  |       }d| z  |z  d| z
  |z  z
  S t        j                  |t        j                         dt        j                         z
        }| t        j                  |      z  }|d| z
  t        j                  d|z
        z  z  }| S )Nr   r   r\   r   )r   r   rd   r   r   r   r6   r   r   r   r   )r   rr   r   
log_logitslog_neg_logitsbces         r   binary_crossentropyr   Q  s   YYvFYYvF||v||#"LL>H
 	
 VV''/
++VG4f}z)S6\^,KKKXXfgoo/w7H1HIF
3776?
"CC&LCGGC&L111C4Kr   c                 .   |rt        d      d}t        j                  | j                        }|dv rd}t	        | d      } t        j                  | |d      }t        j                  | |d      }|s,t        j                  ||      }t        j                  ||      }|rt        j                  |t        j                  t
        j                        j                  t        j                  t
        j                        j                        }t        j                  |t        j                  t
        j                        j                  t        j                  t
        j                        j                        }t	        ||      }t	        ||      }||fS )Nz5Argument synchronized=True is not supported with JAX.F)float16bfloat16Tr   r   r_   )NotImplementedErrorr   standardize_dtyper   r	   r   meanvarr   r   finfor  minr   )r   r   r`   synchronized	need_cast	ori_dtyper  variances           r   momentsr  g  s0   !C
 	
 I))!''2I++	I88Atd+Dwwqtd3H{{4&;;x.xx#))CKK(,,cii.D.H.H
 88cii,00#))CKK2H2L2L
 D)$),>r   c                    dgt        | j                        z  }|j                  d   ||<   t        j                  ||      }t        j                  ||      }t        j
                  j                  ||z         }|t        j                  ||      }||z  }| |z  }	|t        j                  ||      }|	|z   }	t        j                  | |z  |	      S )Nr]   r   )r   rd   r   rf   r   r   rsqrtr   )
r   r  r  rM   offsetscaler   rd   invress
             r   batch_normalizationr    s     C#agg,E**Q-E$K;;tU#D{{8U+H
''--7*
+CE5)Ek%#+CVU+Fl771s7C  r   c                    t        | d      } t        |      }t        |d      }t        |d      }|j                  \  }}| j                  \  }dt        j                  |j                  d      }t        ||      }d }	 |	||      }
 |	||      }|
j                  |j                        }
|j                  |j                        }t        j                  |      }|t        j                  |
d      j                  t        j                        z
  }| d d d df   | d d dd f   k(  j                  t        j                        t        j                  d	      |d d d d ||dz   f   }t        j                  |d
      }t        j                   j#                  | |      }t        j$                  d||      }t        j                  |d
      }t        j&                  |dz   f|j                        z  }|j(                  d d df   j+                  d      }t        j&                  |f|j                        z  }d fd}|||j                  d      f}t        j,                  j/                  |||f|      \  }\  }} |d   |d         }|j(                  d   j+                  |      }t        j                   j#                  ||dz         }t        j$                  d||       }|S )Nr   r   g     jr   c                     t        j                  |      j                  d| j                  z  |fz         }t        j                  | d      } || k  }t        j
                  |      S )Nrv   r^   rL   )r   rc   rf   re   expand_dimslogical_not)lengths
max_lengthr   
elem_valids       r   _lengths_to_paddingsz&ctc_loss.<locals>._lengths_to_paddings  sY    **Z(007<<:-/
 //'3w&
z**r   r]   rL   r^   )r   r   r   r]   r   r   )r   zbtk,bnk->btnr   r'   c           	          t        j                  | d d d df   t        j                  | d d dd f   |      gd      S )Nr]   r^   rL   )r   r   	logaddexp)phiadded_scores     r   update_phi_scorez"ctc_loss.<locals>.update_phi_score  s>    BQBZs1ab5z;?@r
 	
r   c                 (   | \  }}|} ||z  z         }|\  }}}t        j                  |d d d df   |z   ||z         }||z   }	 |	||z   dz
  z  z         }	|j                  
df      }||z  d|z
  |z  z   }||z  d|z
  |	z  z   }	|	|f|	|ffS )Nr^   r   r]   )r   r   rf   )prevr   prev_phi	prev_emitprev_phi_origlogprob_emitlogprob_phipad	next_emitnext_phi
batch_sizelog_epsilonrepeatr#  s             r   	loop_bodyzctc_loss.<locals>.loop_body  s    ") #Hi+:N.NO)*&k3 MMQV|+Y-E
	 k)#i+-sV|0LL
 kk:q/*)OsSyI&==	&#)x)??)$x&;;;r   r]   r   zbn,bn->b)r
   rd   r   result_typer   r	   r   r   rZ   r   rg   r   r   r+  r   r   r   r   einsumr   atsetr   scan)r   rr   target_lengthoutput_length
mask_indexmax_input_lengthr   max_label_lengthr   r  target_paddingsoutput_paddingslogprobslabel_lengthslogprobs_phi_one_hotlogprobs_emitlogalpha_phi_initlogalpha_emit_initr1  xs_logalpha_philogalpha_emitlogalpha_phi_lastper_seq_lossr.  r/  r0  r#  s                             @@@@r   ctc_lossrL    s    vW5Fv&F%mW=M%mW=M06-J +#)<< J K i8E&% F+ +=:JKO*=:JKO%,,V\\:O%,,V\\:Ov&H$swwQ'G'N'N		( M
 QVnq!"u-55ckkBFWWV-.FAq*zA~"==>L==y9Lvv~~f+~>HJJ~xBMMM-;M 	*.236<<H
	  *,,QT266s;*./v||D
	 

<2 '@'@'H	IB'*ww||%'9:B($A$m
 )b)9=;LM??2&**+<=L vv~~m9IA9M~NHJJz+<hGGLr   c                    t        |       } t        |d      }| j                  \  }}}||dz
  }t        j                  | d      }t        j                  | d      }t        j
                  |      d d d f   }	|	|d d d f   k\  }	t        j                  |	||      }t        j                  |	d|      }|rD|d d dd f   |d d d df   k(  }
t        j                  |
d      }
t        j                  |
||      }||k(  }t        j                  |d|      }t        j                  t        j
                  |      d      }t        j                  ||df      }t        j                  |||      }t        j                  |d      }t        j                  ||d      }t        j                  |d      d d d f    }t        j                  |d      }||fS )	Nr   r   r]   r^   rL   r'   )r  r2  r   )r
   rd   r   argmaxr   rc   r)   r+  r  tileargsorttake_along_axisrg   )r   sequence_lengthsmerge_repeatedr:  r.  r  r   r   scoresseqlen_maskrepeat_maskinvalid_maskorders                r   _ctc_greedy_decoderY    s    v&F()9I*0,,'J
K 1_
jjb)GWWV"%F**Z(q1K!1!T'!::KiiZ9GYY{C0Faen3B37ggk+;<))KW= j(Liib'2G OOCJJz2;EHHUZO,EIIlJ6EKKB'E!!'5r:Gggf1%ag..FoogA.GF?r   c                    t        |       } t        |      }| j                  \  }}t        j                  |       } t	        j
                  |      d d d f   |d d d f   k\  }dz
  t	        j                  | d      } z
  dz
  dt	        j                  |dz  |ft        j                        }t        j                        }	t	        j                  | d d df   d      d d |	 d f   }
t	        j                  |
k(  |
      }|j                  d d d |	df   j                  |      }t	        j                  |dz  ft        j                   | j                         j                  d d d |	f   j                  t	        j"                  | d d df   |
d            }|d d d d df   k(  }fdd fd	fd
fdfd} t%        j&                  |      |||| |      \  }}t	        j                  |k(  |z
  dz
        }t	        j(                  |g d      }||fS )Nr]   r   rL   r^   r   r   c                    t        j                  | d      } t        j                  |      }t        j                  |      }t        j                  | 
k(  d      }t        j                  dz  z        }| ||dz
  f   }t        j                  |dk(  
|      }t        j                        j
                     j                  
      }t        j                  |dz        }|}|
k(  }| ||k(  z  }	t        j                  |	
|      }| j
                  ||f   j                  |      } t        j                  |dz        }||z   }| ||fS )Nr   rL   r]   r   )r   r0  rN  rc   r)   r5  r6  rO  )pathsrT  maskedr   path_tail_indexpaths_arange
path_tailsclassesprev_maskedmasked_repeat_pad
beam_widthr:  r   s             r   _extend_pathsz._ctc_beam_search_decode.<locals>._extend_pathsX  s<   

5+A6FK0FK0**Ud];zz!j.;">?<1)<<=
YY!3T:F
**[),,Z8<<TB((7A
N3D$
g(=>))M4967;;GDHHQJ'!ff$$r   c                     t        j                  |      }t        j                  ||z
        }t        j                  |      j                  |    j                  |      }t        j                  |      |z   }|S r   )r   r   exp
zeros_liker5  r   r   )unique_inverserT  
scores_max
scores_exps       r   _merge_scoresz._ctc_beam_search_decode.<locals>._merge_scoresq  s]    WWV_
WWVj01
'**>:>>zJ:-r   c                    t        j                  | ddz  
z  d	      \  } }t        |j                        dk\  rt        j                  |d      }t        j
                  |t         j                   |      }t        j
                  ||t         j                         } ||      } ||      }t        j                  ||      }t        j                  |      
 d  }| |   } ||   }||   }t        j                  | d      } t        j                  ||g      }t        j                  t        j                  
t              t        j                  
t              g      }| ||fS )NTr   r   return_inversesizerM   
fill_valuer]   rL   )r   r]   )r   uniquer   rd   r   r)   r   r   rP  rO  r   zerosboolr   )r\  rT  r]  rj  emit_scoresmask_scorestotal_scorestop_indicesrm  rd  re  r   s           r   _prune_pathsz-_ctc_beam_search_decode.<locals>._prune_pathsx  s>    #

[:-!
~ ~##$) [[a@Nii&9ii9#NK@#NK@}}[+>kk,/=k"!+.!+.'+{!;<YYz4(#((:t*DE
 ff$$r   c                 H     | |||      \  } }} | ||      \  } }}| ||fS r    )r\  rT  r]  r   rf  rz  s       r   _decode_stepz-_ctc_beam_search_decode.<locals>._decode_step  s<     -eVVQ Gvv ,UFF Cvvff$$r   c           	      h    | \  }}}|\  }}t        j                  |d ||||      \  }}}|||fd fS )Nc                     | ||fS r   r|  )r\  rT  r]  r   s       r   <lambda>z8_ctc_beam_search_decode.<locals>._step.<locals>.<lambda>  s    eVV-D r   )r   cond)r%  r   r\  rT  r]  rU  r}  s         r   _stepz&_ctc_beam_search_decode.<locals>._step  sV     $vv; #D!
vv vv&,,r   c                 f   t        j                  | ||f|dd  |dd  f      \  \  }}}}t        j                  |ddz  z  d      \  }}	t	        |	j
                        dk\  rt        j                  |	d      }	 |	|      }t        j                  |       d  d d d   }
||
   }||
   }||fS )Nr]   Tr   r   ro  rL   r^   )r   r7  r   rs  r   rd   r   rP  )
init_pathsinit_scoresinit_maskedr   rU  r\  rT  r]  rG  rj  ry  rm  rd  r  re  r   	top_pathss              r   _decode_batchz._ctc_beam_search_decode.<locals>._decode_batch  s     &)XXk2ABZQR)&
" !$

[:-!
~ ~##$) [[a@N~v6kk&)9*+6tt<k"$f}r   r  )r
   rd   r   rZ   r   rc   flipfullr   builtinsr  rP  r)   r5  r6  r   r   rQ  r   vmapr   )r   rR  re  r  r:  r.  max_seq_lenrU  r  num_init_pathsmax_classesinit_classesr  r  r  r\  rT  r}  rf  rm  rd  rz  r  r   s     ```            @@@@@@@r   _ctc_beam_search_decoder  .  s*    v&F()9:+1<<(J[__V$F**[)$'26Fq$w6OOK 1_
 XXf1%Fz)A-JD	Q^[14syyJ \\+z:N++fQTl3A7G4GHK99[J6kJLq/>/14599,GJ 	*a*n-xv||L	A	 	S  1{C	D 
 Q1W%-K%2%@%
-  4 ,CHH]+KfkME6
 IIetmT;+>+BCEMM%+E&=r   c                     t        |       } t        j                  | j                  d      }t	        | |      } |dk(  rt        | |||      S |dk(  rt        | ||||      S t        d| d      )Nr   greedy)rS  r:  beam_search)re  r  r:  zInvalid strategy z2. Supported values are 'greedy' and 'beam_search'.)r
   r   r3  r   r	   rY  r  r   )r   rR  strategyre  r  rS  r:  r   s           r   
ctc_decoder    s     v&Fi8E&% F8!)!	
 	
 
]	"&!!
 	
 z ** *
 	
r   c                 j   | j                   |j                   k7  r&t        d| j                    d|j                    d      t        ||j                        }t	        j
                  t	        j                  | |z
              }dt	        j                  |      z  dt	        j                  |      z  z
  }|S )NzInput shapes z and z" must match for PSNR calculation. r      
   )rd   r   r
   r   r   r  squarelog10)x1x2max_valmsepsnrs        r   r  r    s    	xx288BHH:U288* 5+ +
 	

  rxx8G
((3::b2g&
'C		'""R#))C.%88DKr   c                 p   	 ddl m} ddl m} ddl m} ddl m} ddl m}	 ddlm}
 t        j                         d   j                  d
k(  ry	  |       } |d      st        d       |	| |||dd |d              || | |d      ||dud	       y# t        $ r |rt        d      Y y	w xY w#  |r Y y	xY w)z+Verify the availability of flash attention.r   )_normalize_layout)check_compute_capability)check_cudnn_version)check_is_flash_attention)check_layout)dot_product_attentionFlash attention is not supported in your current JAX version. Please update it by following the official guide: https://jax.readthedocs.io/en/latest/installation.htmlFtpuTz8.0z#Require at least Ampere arch to runNBTNH)q_seqlen	kv_seqlenlayout)is_training)(jax._src.cudnn.fused_attention_stablehlor  r  r  r  r  jax.nnr  ImportErrorr   devicesplatformRuntimeError)querykeyvaluebiasraise_errorr  r  r  r  r  r  cudnn_versions               r   _can_use_flash_attentionr    s    N	
 	Q	
 	JI {{}Q  E) ,-'.DEE$V,	
 	!f%	
 I  I 
 Js   $B AB. B+*B+.B5c                    ||s| S t        j                  | d      }|t        j                  ||      }|rm| j                  d   | j                  d   }}t        j                  t        j
                  ||fd            }|d d d d d d f   }t        j                  ||      }t        j                  dt        j                  | j                        j                  z  | j                        }t        j                  || |      }|S )Nru  r   r      gffffff)r   	ones_likelogical_andrd   trilr   asarrayr  r   r   r)   )ri   mask	is_causalcombined_maskTSlarge_negative_numberpadded_logitss           r   _apply_masksr  9  s    |IMM&7Mt<||AQ1xx!Qv67D$1$%t<KKsyy&***&,, IImV5JKMr   c                    t        j                  | j                  t         j                        }t        j                  d| ||      }|t        j
                  ||j                        z  }|||z   j                  |j                        }t        |||      }	|	j                  t         j                        }	t        j                  j                  |	d      j                  |j                        }
t        j                  d|
|      S )NzBTNH,BSNH->BNTS)preferred_element_typer   r^   rL   zBNTS,BSNH->BTNH)r   promote_typesr   r   r4  r   r   r  r   r   rX   )r  r  r  r  r  r  r  logits_dtyperi   r  probss              r   _dot_product_attention_corer  N  s     $$U[[#++>LZZ5#lF ciiV\\22F4-''5 y9M "((5MFFNN=rN299#))DE::'66r   c                    |0| j                   d   |j                  j                   d   k(  sJ d       |t        j                  |      }n2t        j                  | j                   d   | j                   d   f      }t        j
                  |f| j                   d   z        }	t        j                  |	|||      }
 t        j                  |
      | |||      S )	Nr   r]   zESharding along sequence dimension not allowed in TPU kernel attention)r   )rd   )masks)r  head_shardsq_seq_shardsattn_logits_soft_cap)segment_ids)
rd   qr   	NumpyMask
CausalMaskMultiHeadMaskr   make_splash_mhar   r  )r  r  r  decoder_segment_idscustom_maskr  r  r  r  multi_head_masksplash_kernels              r   wrap_flash_attentionr  b  s     &{{1~!4!6!6!<!<Q!?? 	
'	
?
 $..[A$//;;q>5;;q>2

 ,99gA&O ,;;!1	M #388M"sE': r   c	                 J
  ()* t        |       } t        |      }t        |      }t        | j                        dk7  s0t        |j                        dk7  st        |j                        dk7  r3t        d| j                   d|j                   d|j                   d      t	        j
                         d   j                  }	|	dk(  }
d}d}|
rh	 dd	lm} dd
lm	}  |       }|rRt        ||      rF|j                  }d|j                  v r,|j                  j                  d      }|j                  |   }d}t!        d | ||fD              }|9|
rd}nK|r#t        t	        j
                               dk  rd}n&t#        | |||      }n|du r|
s	 t#        | |||d       |
r|rt'        j(                  | d      }t'        j(                  |d      }t'        j(                  |d      }|j                  \  }}}}|||t+        j,                  |      z  z  }t'        j.                  ||gt&        j0                        }t3        j4                  ||      }d}||j6                  t&        j8                  k7  r|j;                  d      n|}|j<                  dk(  r|j                  d   |k(  r|d   }n&|j<                  dk(  r|j                  d   |k(  r|d   }|rR|Pt'        j>                  t'        j@                  ||ft&        j8                              }t'        jB                  ||      }|<|r:t'        j>                  t'        j@                  ||ft&        j8                              }	 tE        ||||||||      }t'        j(                  |d      S tG        t        jH                  d      r1	 t        jH                  jK                  | |||||||rd      S d      S |rtM        d      | j                  } |j                  \  }!}!)}"|dt'        j,                  |"      z  n|}| j                  \  }#}$*}"*)z  (t'        jN                  | |#|$)(|"f      } ()*fd}% |%|      } |%|      }t	        jP                  tR        d d!      }& |&| ||||||      }'t'        jN                  |'|       S # t        t        t        f$ r d}d}Y w xY w# t$        $ r d}Y ;w xY w# t$        $ r d}Y Xw xY w# t$        $ r- |r)t        jH                  jK                  | ||||||d      cY S  w xY w)"a  Computes dot-product attention given query, key, and value.

    This is the core computation of attention that is used in transformers.
    For TPU platforms, flash attention optimizations are automatically applied
    when possible, and sharding parameters are inferred from the layout map
    in the current distribution context.

    Args:
        query: Queries with shape `[batch, time, heads,
            depth_k]`.
        key: Keys with shape `[batch, time, heads,
            depth_k]`.
        value: Values with shape `[batch, time, heads,
            depth_v]`.
        bias: Optional bias with shape broadcastable to
            `[batch, heads, dest_time, source_time]`.
        mask: Optional mask with shape broadcastable to
            `[batch, heads, dest_time, source_time]`.
        scale: Float. Optional scale that is applied to the attention
            computation.
        is_causal: Boolean. Specifying whether causal masking is applied.
        flash_attention: Boolean. Whether to use flash attention optimization
            for increased performance. Default to None, which means it will
            be auto-determined based on the platform, input shapes and
            compatibility.
        attn_logits_soft_cap: Float. Optional float to softly cap attention
            logits to avoid numerical stability issues. Applied as:
            `logits = logits / (1.0 + abs(logits) / attn_logits_soft_cap)`.

    Returns:
        JAX Array of shape `[batch, time, heads, depth_v]`.
       zG`dot_product_attention` only supports 4D inputs. Received: query.shape=z, key.shape=z, value.shape=.r   r  r]   )ModelParallel)distributionmodelc              3   f   K   | ])  }t        |d       xr |j                  j                    + yw)shardingN)hasattrr  is_fully_replicated).0ts     r   	<genexpr>z(dot_product_attention.<locals>.<genexpr>  s4      # 	:Eqzz'E'E#EE#s   /1NTF)r  )r   r   r]   r  r   r   )r  kvru  r  r  )r  r  r  r  r  r  cudnnxla)r  r  r  r  implementationr  r   c           	          | e| j                   \  }}}}|dk(  r-t        j                  | d d d d d d d d d f   ||||f      } | S |k(  sJ t        j                  | |||f      } | S )Nr]   )rd   r   broadcast_torf   )r  tBtNtTtSGKNs        r   _reshape_to_groupedz2dot_product_attention.<locals>._reshape_to_groupedh  s    =WWNBBQw$$Qq!T1a'7%82r1b":MN  QwwKKB1b"#56r   )r  NNr   r   NN)in_axesout_axes)*r
   r   rd   r   r   r  r  'keras.src.distribution.distribution_libr  r  rw   device_mesh
axis_namesindexr  AttributeErroranyr  	Exceptionr   r   r   sqrtrt  r   r   
SegmentIdsr   bool_r   re   r  r   r  r  r  r   r  r  rf   r  r  )+r  r  r  r  r  r  r  flash_attentionr  r  is_tpur  r  r  get_distdistmeshmodel_dim_indexpartially_sharded_inputsquery_tpu_layoutkey_tpu_layoutvalue_tpu_layoutbs	num_headsq_lenhead_dimr  r  r  	mask_boolcausal_maskrr   output_shaperG  HBr  r  
vmapped_fnencodedr  r  r  s+                                           @@@r   r  r    s6   V e$E
C
 Ce$E
5;;1CII! 3s5;;7G17L%%*[[Mcii[ I ;;-q*
 	
 {{}Q((HF KL	M
 :D
47''doo-&*oo&;&;G&DO"&**_"=K#$L  # #e$#    #O (C,>!,C"'":3t# 
D	 	$$UC$O /==\Bs>==\B)9)?)?&Iuh   05499X;N3NO iiU399=5@@k

 /3zzSYY/FF+DI~~"yq'9R'?'l1$);r)A'o[4!hhHHeU^399= "ook;G9((388UEN#))#LMK	$)  $7'%9')	F ==l;;
 svv./	66//#*9w 0 	 	 @E 0 	 	2 E
 	
 ;;LJAq!Q#(=S388A;eE JAq!Q	QAKK1aA/E t$Dt$D#1J
 UD$	5IG;;w--G Z8 	KL	F  	$ $O	$t  	$#O	$   	vv33'#( 4 	 	 	sO   A'R) S &)S +(S, S, )SSSSS)(S),2T" T")g      ?)r  )g?)r   )T)r^   )ru   T)Nr   )Nr   Nr   )ru   F)r]   r   Nr]   )r]   r   NNr]   )r^   r   F)Fr^   )F)FF)NNgMbP?)r   )TN)d   r]   N)r  r  r]   Tr   )NNr]   r]   )NNNFNN)Lr  r   r   jax.experimental.sparseexperimentalr   r   	jax.numpynumpyr   r   r   r   0jax.experimental.pallas.ops.tpu.splash_attentionr   r   	keras.srcr   &keras.src.backend.common.backend_utilsr   keras.src.backend.jax.corer	   r
   r   r   r   r   r   r   r    r#   r,   r.   r1   r4   r6   r9   r;   r=   rA   rD   rF   rI   rN   rQ   rT   r+   rX   rZ   rs   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  rL  rY  r  r  r  r  r  r  r  r  r|  r   r   <module>r%     s     
 , ,     , 8


!






"

<


#

$
$
!

5
6
%
)
*  #	, #R I0 %&T  8 1n *b > -`?8.2428,D ?C!*cR 	(\ `L  
F
5p*72 $V 
	
q.r   