
    BVh.1                       d Z ddlZddlmZmZmZmZmZmZm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddlm!Z! ddlm"Z# ddl$m%Z% ddl&m'Z' ddl&m(Z( ddl&m)Z) ddl&m*Z* ddl&m+Z, ddl-m.Z. ddl/m0Z0 ddl/m1Z1 ddl2m3Z3 ddl2m4Z4 ddl2m5Z5 dd l6m7Z7 dd!l8m9Z9 dd"l8m:Z: dd#l;m<Z= dd$l>m?Z? dd%l>m@Z@ dd&l>mAZA dd'lBmCZC d(ZDd)ZE G d* d+ej                        ZGd, ZH eCd-       G d. d/e9j                               ZJej                  d0e	d1ee
eLe	f      d2ee	ee	e,j                  f   f   d3e4j                  fd4       ZOej                  d0e	d1ee
eLe	f      d2ee	ee	e,j                  f   f   d3e4j                  fd5       ZPd6 ZQd7 ZR e0j                  d8d9 eQeRd:;       d<e
e	e
eLd=f   ej                  eg ef   f   fd>ZUd1ee	   d<ed=eGf   fd?ZVy)@z!Mid level API for TPU Embeddings.    N)	AnyCallableDictIterableListOptionalTextTupleUnion)logging)attr_value_pb2)tpu_embedding_configuration_pb2)device_util)distribute_lib)distribute_utils)sharded_variable)tpu_strategy)context)def_function)constant_op)device)dtypes)ops)sparse_tensor)tensor)TensorShape)	array_ops)math_ops)
sparse_ops)variable_scope)	variables)ragged_tensor)registration)save_context)tpu)tpu_embedding_v2_utils)tpu_replication)tpu_ops)autotrackable)base)internal)compat)nest)
tf_inspect)	tf_exportTPUEmbedding_saveable_tpu_embedding_layerc                        e Zd ZdZed        Zy)TPUEmbeddingVariablez A ShardedVariable class for TPU.c                 4    | j                   d   j                  S Nr   )r!   _in_graph_modeselfs    V/home/dcms/DCMS/lib/python3.12/site-packages/tensorflow/python/tpu/tpu_embedding_v2.pyr6   z#TPUEmbeddingVariable._in_graph_modeB   s    >>!+++    N)__name__
__module____qualname____doc__propertyr6    r:   r9   r3   r3   ?   s    (, ,r:   r3   c                 ~    | j                  t        t        j                  t	        j
                  |                   y )N)s)	_set_attr	_NAME_KEYr   	AttrValuer,   as_bytes)opnames     r9   _add_key_attrrI   G   s$    ,,y.22V__T5JKLr:   z'tpu.experimental.embedding.TPUEmbeddingc                       e Zd ZdZ	 d6deej                  ef   deej                     de
fdZd7dZ	 d8deeee   ef      fd	Z	 	 d7d
eee      dee   fdZdee   dee   fdZedeej*                  ej.                  f   fd       Zdej4                  fdZd8dee   fdZd8dee   fdZdeeeeej.                  f   f   fdZd Z d Z!d Z"d Z#d Z$dee%jL                     deee%jL                        deej                     dedede'jP                  fd Z)d! Z*d" Z+d# Z,	 	 	 	 d9d$e
dee   d%ee   fd&Z-d'e
d(e
dee   fd)Z.defd*Z/defd+Z0defd,Z1d-ee   fd.Z2d-ee   fd/Z3d0 Z4d1ed2ede
fd3Z5d4 Z6d5 Z7y):TPUEmbeddinga_  The TPUEmbedding mid level API.

  NOTE: When instantiated under a TPUStrategy, this class can only be created
  once per call to `tf.tpu.experimental.initialize_tpu_system`. If you wish to
  re-initialize the embedding engine you must re-initialize the tpu as well.
  Doing this will clear any variables from TPU, so ensure you have checkpointed
  before you do this. If a further instances of the class are needed,
  set the `initialize_tpu_embedding` argument to `False`.

  This class can be used to support training large embeddings on TPU. When
  creating an instance of this class, you must specify the complete set of
  tables and features you expect to lookup in those tables. See the
  documentation of `tf.tpu.experimental.embedding.TableConfig` and
  `tf.tpu.experimental.embedding.FeatureConfig` for more details on the complete
  set of options. We will cover the basic usage here.

  NOTE: multiple `FeatureConfig` objects can use the same `TableConfig` object,
  allowing different features to share the same table:

  ```python
  table_config_one = tf.tpu.experimental.embedding.TableConfig(
      vocabulary_size=...,
      dim=...)
  table_config_two = tf.tpu.experimental.embedding.TableConfig(
      vocabulary_size=...,
      dim=...)
  feature_config = {
      'feature_one': tf.tpu.experimental.embedding.FeatureConfig(
          table=table_config_one),
      'feature_two': tf.tpu.experimental.embedding.FeatureConfig(
          table=table_config_one),
      'feature_three': tf.tpu.experimental.embedding.FeatureConfig(
          table=table_config_two)}
  ```

  There are two modes under which the `TPUEmbedding` class can used. This
  depends on if the class was created under a `TPUStrategy` scope or not.

  Under `TPUStrategy`, we allow access to the method `enqueue`, `dequeue` and
  `apply_gradients`. We will show examples below of how to use these to train
  and evaluate your model. Under CPU, we only access to the `embedding_tables`
  property which allow access to the embedding tables so that you can use them
  to run model evaluation/prediction on CPU.

  First lets look at the `TPUStrategy` mode. Initial setup looks like:

  ```python
  strategy = tf.distribute.TPUStrategy(...)
  with strategy.scope():
    embedding = tf.tpu.experimental.embedding.TPUEmbedding(
        feature_config=feature_config,
        optimizer=tf.tpu.experimental.embedding.SGD(0.1))
  ```

  When creating a distributed dataset that is to be passed to the enqueue
  operation a special input option must be specified:

  ```python
  distributed_dataset = (
      strategy.distribute_datasets_from_function(
          dataset_fn=...,
          options=tf.distribute.InputOptions(
              experimental_fetch_to_device=False))
  dataset_iterator = iter(distributed_dataset)
  ```

  Different feature inputs can have different shapes. For dense and sparse
  tensor, rank 2 and above is supported. For ragged tensor, although only rank 2
  is supported, you can specify the output shape to be rank 2 and above. The
  output shape specified in the FeatureConfig has the first priority. The input
  shape passed in build method has second priority and the input shapes
  auto detected from input feature has the lowest priority. The latter two will
  be converted to output shapes by omitting the last dimension. If the lower
  priority one has output shapes which don't match the former one. A ValueError
  will be raised. Only when the former one has undefined output shapes, the
  latter one can override.

  NOTE: All batches passed to the layer can have different input shapes. But
  these input shapes need to match with the output shapes set by either
  `FeatureConfig` or build method except for ragged tensor. Only 2D
  ragged tensor with output shape set to higher dimensions is allowed as
  long as the total number of elements matches. All subsequent calls must have
  the same input shapes. In the event that the input shapes cannot be
  automatically determined by the enqueue method, you must call
  the build method with the input shapes or provide output shapes in the
  `FeatureConfig` to initialize the layer.

  To use this API on TPU you should use a custom training loop. Below is an
  example of a training and evaluation step:

  ```python
  @tf.function
  def training_step(dataset_iterator, num_steps):
    def tpu_step(tpu_features):
      with tf.GradientTape() as tape:
        activations = embedding.dequeue()
        tape.watch(activations)
        model_output = model(activations)
        loss = ...  # some function of labels and model_output

      embedding_gradients = tape.gradient(loss, activations)
      embedding.apply_gradients(embedding_gradients)
      # Insert your model gradient and optimizer application here

    for _ in tf.range(num_steps):
      embedding_features, tpu_features = next(dataset_iterator)
      embedding.enqueue(embedding_features, training=True)
      strategy.run(tpu_step, args=(tpu_features, ))

  @tf.function
  def evaluation_step(dataset_iterator, num_steps):
    def tpu_step(tpu_features):
      activations = embedding.dequeue()
      model_output = model(activations)
      # Insert your evaluation code here.

    for _ in tf.range(num_steps):
      embedding_features, tpu_features = next(dataset_iterator)
      embedding.enqueue(embedding_features, training=False)
      strategy.run(tpu_step, args=(tpu_features, ))
  ```

  NOTE: The calls to `enqueue` have `training` set to `True` when
  `embedding.apply_gradients` is used and set to `False` when
  `embedding.apply_gradients` is not present in the function. If you don't
  follow this pattern you may cause an error to be raised or the tpu may
  deadlock.

  In the above examples, we assume that the user has a dataset which returns
  a tuple where the first element of the tuple matches the structure of what
  was passed as the `feature_config` argument to the object initializer. Also we
  utilize `tf.range` to get a `tf.while_loop` in order to increase performance.

  When checkpointing your model, you should include your
  `tf.tpu.experimental.embedding.TPUEmbedding` object in the checkpoint. It is a
  trackable object and saving it will save the embedding tables and their
  optimizer slot variables:

  ```python
  checkpoint = tf.train.Checkpoint(model=model, embedding=embedding)
  checkpoint.save(...)
  ```

  On CPU, only the `embedding_table` property is usable. This will allow you to
  restore a checkpoint to the object and have access to the table variables:

  ```python
  model = model_fn(...)
  embedding = tf.tpu.experimental.embedding.TPUEmbedding(
      feature_config=feature_config,
      optimizer=tf.tpu.experimental.embedding.SGD(0.1))
  checkpoint = tf.train.Checkpoint(model=model, embedding=embedding)
  checkpoint.restore(...)

  tables = embedding.embedding_tables
  ```

  You can now use table in functions like `tf.nn.embedding_lookup` to perform
  your embedding lookup and pass to your model.

  feature_config	optimizer#pipeline_execution_with_tensor_corec                    t        j                         | _        t        | j                  t        j
                  t        j                  f      | _        || _        || _	        g | _
        t        j                  |      D ]'  }| j                  j                  |j                         ) t        | j                  j                   dd      }|r|j"                  nd| _        g | _        t        j                  |      D ]@  }|j(                  | j&                  vs| j&                  j                  |j(                         B g }t+        | j&                        D ]  \  }}|j,                  ||_        |j,                  | j                  rQt        |j,                  t.        j0                        s-t3        dj5                  t7        |j,                                    |j8                  dj5                  |      |_        |j8                  |v rt3        d|j8                   d      |j                  |j8                          | j                  rg | _        | j&                  D ]t  }t=        |j,                  j>                        s#|j,                  j>                  | j:                  vsF| j:                  j                  |j,                  j>                         v t/        j@                  | j                        | _!        d| _"        d| _#        y)	a,  Creates the TPUEmbedding mid level API object.

    ```python
    strategy = tf.distribute.TPUStrategy(...)
    with strategy.scope():
      embedding = tf.tpu.experimental.embedding.TPUEmbedding(
          feature_config=tf.tpu.experimental.embedding.FeatureConfig(
              table=tf.tpu.experimental.embedding.TableConfig(
                  dim=...,
                  vocabulary_size=...)))
    ```

    Args:
      feature_config: A nested structure of
        `tf.tpu.experimental.embedding.FeatureConfig` configs.
      optimizer: An instance of one of `tf.tpu.experimental.embedding.SGD`,
        `tf.tpu.experimental.embedding.Adagrad` or
        `tf.tpu.experimental.embedding.Adam`. When not created under
        TPUStrategy may be set to None to avoid the creation of the optimizer
        slot variables, useful for optimizing memory consumption when exporting
        the model for serving where slot variables aren't needed.
      pipeline_execution_with_tensor_core: If True, the TPU embedding
        computations will overlap with the TensorCore computations (and hence
        will be one step old). Set to True for improved performance.

    Raises:
      ValueError: If optimizer is not one of tf.tpu.experimental.embedding.(SGD,
      Adam or Adagrad) or None when created under a TPUStrategy.
    _device_assignmentNz{} is an unsupported optimizer class. Please pass an instance of one of the optimizer classes under tf.tpu.experimental.embedding.ztable_{}z:Tables must have a unique name. Multiple tables with name z found.FT)$r   get_strategy	_strategy
isinstancer   TPUStrategyTPUStrategyV2
_using_tpu$_pipeline_execution_with_tensor_core_feature_config_output_shapesr-   flattenappendoutput_shapegetattrextendednum_cores_per_replica_num_cores_per_replica_table_configtable	enumeraterM   r&   
_Optimizer
ValueErrorformattyperH   _dynamic_learning_ratescallablelearning_rateget_list_of_hosts_hosts_built _verify_output_shapes_on_enqueue)	r8   rL   rM   rN   featuredevice_assignmenttable_namesirb   s	            r9   __init__zTPUEmbedding.__init__   s   D $002DN ,2J2J2>2L2L2N ODO 	, 	- *DD<</ 7
  !5!567  !5t 4E//$ 	, D<</ 1	d00	0!!'--01 Kd001 %5		 #??&$//*@*K*KL ::@&!%//2;45 	5 
	&&q)
	{	" 66;jj\J K 	K$%" 
 &(d"%% M%U__223OO))1M1MM

&
&
-
-eoo.K.K
LM +<<T^^LdkDK,0D)r:   Nc                      j                   ry j                  rt        j                         rt	        d       j                  ||        j                          _        t        j                  d       t        j                   j                         t        j                   fd       } |        t        j                  d        j                          _        d _          j!                          y)a  Create the underlying variables and initializes the TPU for embeddings.

    This method creates the underlying variables (including slot variables). If
    created under a TPUStrategy, this will also initialize the TPU for
    embeddings.

    This function will automatically get called by enqueue, which will try to
    determine your output shapes. If this fails, you must manually
    call this method before you call enqueue.

    Args:
      per_replica_input_shapes: A nested structure of The per replica input
        shapes that matches the structure of the feature config. The input
        shapes should be the same as the input shape of the feature (except for
        ragged tensor) Note that it is fixed and the same per replica input
        shapes must be used for both training and evaluation. If you want to
        calculate this from the global input shapes, you can use
        `num_replicas_in_sync` property of your strategy object. May be set to
        None if not created under a TPUStrategy.
      per_replica_batch_size: (Deprecated) The per replica batch size that you
        intend to use. Note that is fixed and the same batch size must be used
        for both training and evaluation. If you want to calculate this from the
        global batch size, you can use `num_replicas_in_sync` property of your
        strategy object. May be set to None if not created under a TPUStrategy.

    Raises:
      ValueError: If per_replica_input_shapes is inconsistent with the output
      shapes stored in the feature config or the output shapes get from the
      input shapes are not fully defined.
      RuntimeError: If tpu embedding is already initialized on TPU.
    NzTPU is already initialized for embeddings. This may be caused by using multiple TPUEmbedding instances in a TPU scope which is unsupportedz"Initializing TPU Embedding engine.c                  D    t        j                   j                         y N)r%   #initialize_system_for_tpu_embedding_config_protor7   s   r9   load_configz'TPUEmbedding.build.<locals>.load_config  s    //0B0BCr:   z'Done initializing TPU Embedding engine.T)rm   rV   r(   is_tpu_embedding_initializedRuntimeError(_get_and_update_output_shapes_from_input_create_config_protorx   r   infor&   log_tpu_embedding_configurationr   function_create_variables_and_slots
_variables_load_variables)r8   per_replica_input_shapesper_replica_batch_sizery   s   `   r9   buildzTPUEmbedding.builda  s    @ {{ 
	-	-	/ 	 334L4JL  446dll78<<T=O=OPD D mll<= 668DODK 	r:   output_shapesc                     | j                   s/t        j                         5  | j                  |       d d d        y y # 1 sw Y   y xY wrv   )rm   r   
init_scoper   )r8   r   s     r9   _maybe_buildzTPUEmbedding._maybe_build  s@    ;; >> "

=!" " " "s	   =Ar   r   c                    d}|r(|&t        j                  d       | j                  |      }|t        |t              r't        j                  d       | j                  |      }nlt        j                  t        j                  |      t        j                  | j                               t        j                  |      }| j                  |      }|"| j                  |       | j                  |       | j                          y)z<Get and update the per replica output shapes from the input.Nzper_replica_batch_size argument will be deprecated, please specify all the input shapes using per_replica_input_shapes argument.zPassing batch size to per_replica_input_shapes argument will be deprecated, please specify all the input shapes using per_replica_input_shapes argument.)r   warning"_get_output_shapes_from_batch_sizerS   intr-   assert_same_structurerZ   rX   $_get_output_shapes_from_input_shapes_check_output_shapes_update_output_shapes"_check_output_shapes_fully_defined)r8   r   r   per_replica_output_shapess       r9   r|   z5TPUEmbedding._get_and_update_output_shapes_from_input  s   
 !%":"BooJK #'"I"I
 #"  +	,c	22	3 %)$K$K$%&! 	""LL12LL--.	0
 $(<<0H#I $($M$M$%&! !,  9:   !:;
 	++-r:   input_shapesreturnc                 (   g }t        |t        j                  | j                              D ]  \  }}|j                  |j                  dk  rt        dj                  |            t        |      dk(  rf|d   dk7  r^|j                  sR|j                  dkD  rC|j                         }|j                  t        |      dz
  |j                         t        |      }|j                  dk(  r|j                  |       |j                  |dd         |S )z7Get output shapes from the flattened input shapes list.N   z;Received input tensor of shape {}. Rank must be 1 and above   r   )zipr-   rZ   rX   rankre   rf   lenr\   max_sequence_lengthas_listinsertr   r[   )r8   r   r   input_shapero   input_shape_lists         r9   r   z1TPUEmbedding._get_output_shapes_from_input_shapes  s    M #L$(LL1E1E$F!H /W				![%5%5%9IVK " 	" k
a
KOq$8""w'B'BQ'F&..0 !A%w'B'B	D!"23			Q	[)["-.'/( r:   c                 |   | j                   rat        j                         rB| j                  D ci c],  }|| j                  |j
                     d   j                  d   . c}S t        d      | j                  d       | j                  D ci c]  }|| j                  |j
                     d   ! c}S c c}w c c}w )a  Returns a dict of embedding tables, keyed by `TableConfig`.

    This property only works when the `TPUEmbedding` object is created under a
    non-TPU strategy. This is intended to be used to for CPU based lookup when
    creating a serving checkpoint.

    Returns:
      A dict of embedding tables, keyed by `TableConfig`.

    Raises:
      RuntimeError: If object was created under a `TPUStrategy`.
    
parametersr   zUnable to retrieve embedding tables when using a TPU strategy. If you need access, save your model, create this object under a CPU strategy and restore.N)	rV   r$   in_save_contextra   r   rH   r!   r{   r   )r8   rb   s     r9   embedding_tableszTPUEmbedding.embedding_tables  s    ( 		%	%	'!//1 tuzz2<@JJ1MM 1 	1 P Q Q 	d
 ++- 4??5::.|<< - -1-s   1B4$B9c                    t        j                         }t        | j                        D ci c]  \  }}||
 }}}| j                  D ]K  }|j                  |j                  j                         | j                  j                  j                  |       M t        | j                        D ci c]  \  }}||
 }}}t        t        j                  | j                        | j                        D ]y  \  }}|j                   j                         }	|j"                  r|j"                  |	_        ||j$                     |	_        |	j(                  j+                  |j-                                { t         j                  j.                  |_        | j                  j2                  }
| j4                  xs d}| j                  j                  j                  |_
        |
|z  |_        t         j                  j8                  |_        | j<                  |_        | j4                  r,d|j@                  _!        | j4                  |j@                  _"        |S c c}}w c c}}w )zCreates the TPUEmbeddingConfiguration proto.

    This proto is used to initialize the TPU embedding engine.

    Returns:
      A TPUEmbeddingConfiguration proto.
    r   T)#r   TPUEmbeddingConfigurationrc   rh   ra   _set_table_descriptortable_descriptoraddrR   r^   	num_hostsr   r-   rZ   rX   rY   feature_descriptorrH   rb   table_idr   extendr   TRAININGmodenum_replicas_in_syncr`   num_tensor_coresDIV_DEFAULTsharding_strategyrW   rN   spmd_shardingenabledr_   )r8   config_protorr   rlearning_rate_indexrb   table_to_idro   r\   r   num_replicar_   s               r9   r}   z!TPUEmbedding._create_config_proto%  s    3LLNL -6$$-& 'DAq1a4 ' ' ## !!

'
'
+
+
-
..
!
!
+
+
 -6d6H6H,IJ55!8JKJ "%T))*D,?,?"A D'::>>@	"),,$/$>! $$++L,@,@,BCD 	(AAJJ  ..55K 77<1!^^44>>L$/2G$GL! 	(AAMM " 	11 4 ""+/l  (

%
%   6 _' Ks   I/IrH   c           
         | j                   st        d      | j                  st        d      | j                  xs d}t	        j
                  | j                  |       g }t        t	        j                  |      t	        j                  | j                        | j                        D ]  \  \  }}}}|D 	cg c]  }	|	|z  	 c}	|j                  j                  gz   }
|5t        |t        j                        st!        dt#        |       d| d      |5|j$                  |
k7  rat!        dj'                  |j$                  ||
            t)        j*                  d	|       t-        j.                  |
t0        j2                  
      }|j5                  t-        j6                  ||j$                                t9        j:                  || j<                  D cg c],  }t?        j@                   |       t0        j2                  
      . c}| jB                  jE                               }|tG        ||       yyc c}	w c c}w )aw  Applies the gradient update to the embedding tables.

    If a gradient of `None` is passed in any position of the nested structure,
    then an gradient update with a zero gradient is applied for that feature.
    For optimizers like SGD or Adagrad, this is the same as applying no update
    at all. For lazy Adam and other sparsely applied optimizers with decay,
    ensure you understand the effect of applying a zero gradient.

    ```python
    strategy = tf.distribute.TPUStrategy(...)
    with strategy.scope():
      embedding = tf.tpu.experimental.embedding.TPUEmbedding(...)

    distributed_dataset = (
        strategy.distribute_datasets_from_function(
            dataset_fn=...,
            options=tf.distribute.InputOptions(
                experimental_fetch_to_device=False))
    dataset_iterator = iter(distributed_dataset)

    @tf.function
    def training_step():
      def tpu_step(tpu_features):
        with tf.GradientTape() as tape:
          activations = embedding.dequeue()
          tape.watch(activations)

          loss = ... #  some computation involving activations

        embedding_gradients = tape.gradient(loss, activations)
        embedding.apply_gradients(embedding_gradients)

      embedding_features, tpu_features = next(dataset_iterator)
      embedding.enqueue(embedding_features, training=True)
      strategy.run(tpu_step, args=(tpu_features, ))

    training_step()
    ```

    Args:
      gradients: A nested structure of gradients, with structure matching the
        `feature_config` passed to this object.
      name: A name for the underlying op.

    Raises:
      RuntimeError: If called when object wasn't created under a `TPUStrategy`
        or if not built (either by manually calling build or calling enqueue).
      ValueError: If a non-`tf.Tensor` non-`None` gradient is passed in, or a
        `tf.Tensor` of the incorrect shape is passed in. Also if
        the size of any sequence in `gradients` does not match corresponding
        sequence in `feature_config`.
      TypeError: If the type of any sequence in `gradients` does not match
        corresponding sequence in `feature_config`.
    zYapply_gradients is not valid when TPUEmbedding object is not created under a TPUStrategy.zzapply_gradients called on unbuilt TPUEmbedding object. Please either call enqueue first or manually call the build method.r   Nzfound non-tensor type: z	 at path .z9Found gradient of shape {} at path {}. Expected shape {}.z|No gradient passed for feature %s, sending zero gradient. This may not be correct behavior for certain optimizers like Adam.dtype)shape)inputslearning_ratesconfig)$rV   r{   rm   r`   r-   r   rX   r    flatten_with_joined_string_pathsrZ   rY   rb   dimrS   
tensor_libTensorre   rg   r   rf   r   r   r   zerosr   float32r[   reshaper(   send_tpu_embedding_gradientsrh   r   castrx   SerializeToStringrI   )r8   	gradientsrH   r_   updated_gradientspathgradientro   r\   xfull_output_shapefnrG   s                r9   apply_gradientszTPUEmbedding.apply_gradientsi  s,   n ?? F G G ;; 2 3 3 !77<1t33Y?36--i8T))*D,?,?4A =/x'< ?KK144K
--

O  
	j:;L;L&M%d8n%5YtfAFH 	H		>>.. ''-vhnnd.?(AB B 	$%)	+ ??#4FNNK 


HHNN
;=3=6 
	-	-  22
 MM"$fnn5
 !!335
7B B C L4
s   7H:1H?
c                 l   | j                   st        d      | j                  st        d      t        j                  t        | j                  j                        | j                  j                               }|t        |d   j                  |       t        j                  | j                  |      S )a  Get the embedding results.

    Returns a nested structure of `tf.Tensor` objects, matching the structure of
    the `feature_config` argument to the `TPUEmbedding` class. The output shape
    of the tensors is `(*output_shape, dim)`, `dim` is the dimension of the
    corresponding `TableConfig`. For output_shape, there are three places where
    it can be set.
      1. FeatureConfig provided in the __init__ function.
      2. Per_replica_output_shapes by directly calling the build method
           after initializing the tpu embedding class.
      3. Auto detected from the shapes of the input feature.
    The priority of these places is the exact same order.

    ```python
    strategy = tf.distribute.TPUStrategy(...)
    with strategy.scope():
      embedding = tf.tpu.experimental.embedding.TPUEmbedding(...)

    distributed_dataset = (
        strategy.distribute_datasets_from_function(
            dataset_fn=...,
            options=tf.distribute.InputOptions(
                experimental_fetch_to_device=False))
    dataset_iterator = iter(distributed_dataset)

    @tf.function
    def training_step():
      def tpu_step(tpu_features):
        with tf.GradientTape() as tape:
          activations = embedding.dequeue()
          tape.watch(activations)

          loss = ... #  some computation involving activations

        embedding_gradients = tape.gradient(loss, activations)
        embedding.apply_gradients(embedding_gradients)

      embedding_features, tpu_features = next(dataset_iterator)
      embedding.enqueue(embedding_features, training=True)
      strategy.run(tpu_step, args=(tpu_features, ))

    training_step()
    ```

    Args:
      name: A name for the underlying op.

    Returns:
      A nested structure of tensors, with the same structure as `feature_config`
    passed to this instance of the `TPUEmbedding` object.

    Raises:
      RuntimeError: If called when object wasn't created under a `TPUStrategy`
        or if not built (either by manually calling build or calling enqueue).
    zQdequeue is not valid when TPUEmbedding object is not created under a TPUStrategy.zrdequeue called on unbuilt TPUEmbedding object. Please either call enqueue first or manually call the build method.)num_outputsr   r   )rV   r{   rm   r(   recv_tpu_embedding_activationsr   rx   r   r   rI   rG   r-   pack_sequence_asrX   )r8   rH   activationss      r9   dequeuezTPUEmbedding.dequeue  s    p ?? 8 9 9 ;; - . .
 88**==>!!3357K
 KN%%t,   !5!5{CCr:   c                       fd}i } j                   D ]j  } j                  s ||      ||j                  <   %t        j                  t         j                              5   ||      ||j                  <   ddd       l |S # 1 sw Y   xxY w)aW  Create variables for TPU embeddings.

    Note under TPUStrategy this will ensure that all creations happen within a
    variable creation scope of the sharded variable creator.

    Returns:
      A dict of dicts. The outer dict is keyed by the table names and the inner
      dicts are keyed by 'parameters' and the slot variable names.
    c                      j                    j                  ffddfd	  j                   j                  j                         } fd} j
                   j
                  j                  ||      }ni }||d<   |S )zCreate all variables.c                 j    ~t        j                  ||      }t        j                  | |||      S )Nr   )rH   initial_valuer   r   	trainable)	functoolspartialtf_variablesVariable)rH   r   r   initializerr   r   variable_shapes         r9   getterzRTPUEmbedding._create_variables_and_slots.<locals>.create_variables.<locals>.getter2  sA     "))+~057$$' ! 	!r:   c                 N    j                  | |t        j                  |      S )N)rH   r   r   r   r   r   ) _add_variable_with_custom_getterr   r   )rH   r   r   r   r8   r   s      r9   variable_creatorz\TPUEmbedding._create_variables_and_slots.<locals>.create_variables.<locals>.variable_creator?  s3     44# .. 5 ! 	!r:   )r   c                 8     j                   dz   | z   |d      S )N/FrH   )rH   r   rb   r   s     r9   slot_creatorzXTPUEmbedding._create_variables_and_slots.<locals>.create_variables.<locals>.slot_creatorN  s&    

S 04 7 + %' 	'r:   r   )T)vocabulary_sizer   rH   r   rV   rM   _create_slots)rb   r   r   	slot_varsr   r   r   r8   s   `   @@@r9   create_variableszBTPUEmbedding._create_variables_and_slots.<locals>.create_variables.  s    --uyy9n!
! $EJJ0A0A26//.ACj'
 
	$OO11*lK		 *ir:   N)ra   rV   rH   r    variable_creator_scopemake_sharded_variable_creatorrl   )r8   r   r!   rb   s   `   r9   r   z(TPUEmbedding._create_variables_and_slots!  s    *\ I## :__ 0 7	%**22)$++68 	:"25"9)EJJ
	: 	:	: 		: 	:s   "BB	c                    | j                   r{| j                  rnt        j                         st	        j
                         sEt        | j                  j                         | j                  | j                  | j                         y y y y rv   )rV   rm   r   executing_eagerlyr$   r   _load_variables_implrx   r   rl   r   ra   r7   s    r9   r   zTPUEmbedding._load_variablesg  se    
 4;;%%'L,H,H,J4--??A;;??--/ -K 'r:   c                    | j                   r{| j                  rnt        j                         st	        j
                         sEt        | j                  j                         | j                  | j                  | j                         y y y y rv   )rV   rm   r   r   r$   r   _retrieve_variables_implrx   r   rl   r   ra   r7   s    r9   _retrieve_variablesz TPUEmbedding._retrieve_variabless  se    
 4;;%%'L,H,H,Jt11CCE#{{##113 -K 'r:   c	                    |t        dj                  |            |j                  |       |j                  t        j                  t        j                  |dg      t        j                               |j                  |       y )Nz`Weight specified for dense input {}, which is not allowed. Weight will always be 1 in this case.r   )	re   rf   r[   r   r   r   r   r   int64)	r8   r   weightindicesvaluesweights	int_zerosfloat_zerosr   s	            r9   _add_data_for_tensorz!TPUEmbedding._add_data_for_tensor  si    228&,@ @ NN9
MM(--	 1 1&2$ ?NONN;r:   c
                 0   t        j                  |j                  t        j                        }
|j
                  j                  dk(  r9|	j                  sf|	j                  dkD  rWt        j                  |
ddgddgg      }
n9|	j                  dkD  r*t        j                  d|j
                  j                         |j                  |
       |j                  t        j                  |j                  t        j                               ||t!        |t"        j$                        s$t'        dj)                  |t+        |                  |j                  t        j                  |j                  t        j,                               y |j                  |       y )Nr   r   r   )paddingszZInput tensor is rank %d which is above 2, the max_sequence_length setting will be ignored.zOWeight for {} is type {} which does not match type input which is SparseTensor.)r   r   r   r   int32r   r   r\   r   r   padr   r   r[   r   r   rS   r   SparseTensorre   rf   rg   r   )r8   r   r   r   r   r  r  r  r   ro   sample_indicess              r9   _add_data_for_sparse_tensorz(TPUEmbedding._add_data_for_sparse_tensor  s*    ]]6>>6<<@N||A!!g&A&AA&E"q!fq!f%57 
	$	$q	(@ LL	
 NN>"
MM(--v||<= : :; ==CV!4<>12 	2 nnX]]6==&..ABnn[!r:   c
                    |j                  t        j                  |j                  t        j
                               |j                  t        j                  |j                  t        j                               ||t        |t        j                        s$t        dj                  |t        |                  |j                  t        j                  |j                  t        j                               y |j                  |       y )NzOWeight for {} is type {} which does not match type input which is RaggedTensor.)r[   r   r   
row_splitsr   r  r   r   rS   r"   RaggedTensorre   rf   rg   r   )
r8   r   r   r  r   r  r  r  r   ro   s
             r9   _add_data_for_ragged_tensorz(TPUEmbedding._add_data_for_ragged_tensor  s     hmmF$5$5v||DE
MM(--v||<= : :; ==CV!4<>12 	2 nnX]]6==&..ABnn[!r:   flat_inputsflat_weightsflat_featuresdevice_ordinalmode_overridec                    | j                   D cg c]  }|j                   }}g }g }	g }
t        j                  dt        j
                        }t        j                  dt        j                        }t        |||      D ]  \  }}\  }}t        |t        j                        r| j                  ||||	|
|||       =t        |t        j                        r| j                  ||||	|
||||	       qt        |t        j                         r| j#                  ||||	|
||||	       t%        dj'                  |t)        |                   t+        j,                  ||	|
|||      S c c}w )a  Outputs a the enqueue op given the inputs and weights.

    Args:
      flat_inputs: A list of input tensors.
      flat_weights: A list of input weights (or None) of the same length as
        flat_inputs.
      flat_features: A list of FeatureConfigs of the same length as flat_inputs.
      device_ordinal: The device to create the enqueue op for.
      mode_override: A tensor containing the string "train" or "inference".

    Returns:
      The enqueue op.
    )r   r   zjInput {} is of unknown type {}. Please only pass Tensor, SparseTensor or RaggedTensor as input to enqueue.)sample_indices_or_row_splitsembedding_indicesaggregation_weightsr  r  	combiners)ra   combinerr   r   r   r  r   r   rS   r   r   r  r   r	  r  r"   r  r  re   rf   rg   r(   ,enqueue_tpu_embedding_arbitrary_tensor_batch)r8   r  r  r  r  r  rb   r  indices_or_row_splitsr   r  r  r  inpr   r   ro   s                    r9   _generate_enqueue_opz!TPUEmbedding._generate_enqueue_op  sr   , .2-?-?@E@I@ FG FLL9I//$fnn=K ),\=)2 =$V_dG	C**	+!!#v/Df")9k4	Ic=556((f6K)/))4dG	E c=556((f6K)/))4dG	E  $$*F4c$;= 	==$ ??%: ##% M As   E&c                 d   t        j                         }d}|Q|j                         }|,t        |t        j
                        rd}n|j                  }|,|rnt        |dd      }|Q|t        j                         k7  r/|r-t        dj                  t        j                         |            |S )z9Raises an error if we are not in the TPUReplicateContext.FNTouter_graphaF  Current graph {} does not match graph which contains TPUReplicateContext {}. This is most likely due to the fact that enqueueing embedding data is called inside control flow or a tf.function inside `strategy.run`. This is not supported because outside compilation fails to extract the enqueue ops as the head of a computation.)
r   get_default_graph_get_control_flow_contextrS   r'   TPUReplicateContextouter_contextr]   r{   rf   )r8   graph
in_tpu_ctxctxs       r9   /_raise_error_for_incorrect_control_flow_contextz<TPUEmbedding._raise_error_for_incorrect_control_flow_context  s    
 !!#EJ

++-cOc?>>?*
	 O
 
e]D1e 
 %%''J
 "6#"7"7"95AC C r:   c                 \   t        j                  |d      D ]  \  }}|j                  j                  dk(  r 	 |j                  j	                  d      }|r?t        dj                  ||j                  j                  |j                  j                               y# t
        $ r d}Y Vw xY w)zAChecks all tensors in features to see if they are a direct input.Texpand_compositesPlaceholder_tpu_input_identityFaM  Received input tensor {} which is the output of op {} (type {}) which does not have the `_tpu_input_identity` attr. Please ensure that the inputs to this layer are taken directly from the arguments of the function called by strategy.run. Two possible causes are: dynamic batch size support or you are using a keras layer and are not passing tensors which match the dtype of the `tf.keras.Input`s.If you are triggering dynamic batch size support, you can disable it by passing tf.distribute.RunOptions(experimental_enable_dynamic_batch_size=False) to the options argument of strategy.run().N)r-   r   rG   rg   get_attrre   rf   rH   )r8   featuresr   input_tensoris_inputs        r9   "_raise_error_for_non_direct_inputsz/TPUEmbedding._raise_error_for_non_direct_inputs  s     #CCD* Hl				.??++,AB 
* +1&1=1E1E1=1E1E+GH 	HH  s   BB+*B+c                    d }t        ||      D ]  \  }}t        j                  |      rt        j                  |d      }n|g}|D ]  }|j                  j
                  dk(  rv|j                  j                  d   j                  j
                  dk(  rF|j                  j                  d   j                  j                  D ]  } |||j                           |||j                           y)z<Checks all tensors in features to see are placed on the CPU.c                     t         j                  j                  |      }|j                  dk(  rt	        dj                  | |            y )NTPUaQ  Received input tensor {} which is on a TPU input device {}. Input tensors for TPU embeddings must be placed on the CPU. Please ensure that your dataset is prefetching tensors to the host by setting the 'experimental_fetch_to_device' option of the dataset distribution function. See the documentation of the enqueue method for an example.)	tf_device
DeviceSpecfrom_stringdevice_typere   rf   )r   device_stringspecs      r9   check_devicezETPUEmbedding._raise_error_for_inputs_not_on_cpu.<locals>.check_device:  sM    !!--m<d			U	"-
 .4VD--HJ 	J 
#r:   Tr*  Identityr   TPUReplicatedInputN)r   r-   is_nested_or_compositerZ   rG   rg   r   r   )	r8   r  
flat_pathsr<  r0  
input_pathinput_tensorstr   s	            r9   "_raise_error_for_inputs_not_on_cpuz/TPUEmbedding._raise_error_for_inputs_not_on_cpu7  s    	J %(Z$@ - j		$	$\	2\TJ% -!DDII#DDKKN""&::A))00 4fV]]34 z188
,--r:   trainingr   c           
           j                   st        d       j                         }t        j                   j
                  |        j                  s# j                  r j                  sSt        d      |du } j                  |||      } j                  |        j                   j                  |             t        j                  |      dgt              z  |5t        j                   j
                  |       t        j                  |      t        j                    j
                        t#         \  }	}
 j%                  |	       |r1 j'                  |        fd}t)        j*                  |       y|zrdnd}g } fd fd}t-         j.                  j0                        D ]C  }t3        j4                  |      }t3        j4                  |      } j6                  r$ j.                  j8                  j:                  |   }n$ j.                  j8                  j<                  |   g}t-         j6                  xs d	      D ]  }||   }t>        j@                  jC                  |      jD                  }tG        jH                  tK        jL                  |            5   jO                   |||       |||      ||
      }tQ        |       |jS                  |       ddd        F yrdnd}t>        j@                  jC                  |      }|jT                  dk7  rt        djW                  |            tG        jH                  tK        jL                  |            5   jO                  |jD                  |
      }tQ        |       ddd       y# 1 sw Y   cxY w# 1 sw Y   yxY w)a  Enqueues id tensors for embedding lookup.

    This function enqueues a structure of features to be looked up in the
    embedding tables. We expect that the input shapes of each of the tensors in
    features matches the output shapes set via FeatureConfig or build method
    (if any). the output shapes will be auto detected based on the input shapes
    with the max_sequence_length or output shape setting in the FeatureConfig.
    Note that the output shapes is based on per replica batch size.
    If your input dataset is batched to the global batch size and you use
    `tf.distribute.TPUStrategy`'s `experimental_distribute_dataset`
    or if you use `distribute_datasets_from_function` and batch
    to the per core batch size computed by the context passed to your input
    function, the output shapes should match automatically.

    The auto detected the output shapes:
      1. For dense tensor, if rank 2 or above, make sure the tensor has last
         dimension as 1. The output shape will be the input shape excluding
         the last dimension.
      2. For sparse tensor, make sure the tensor has rank 2 and above.
           a. If feature config has max_sequence_length equals 0 or output shape
              set (the max_sequence_length setting will be ignored), the
              output shape will be the input shape excluding the last dimension.
           b. Otherwise, if the tensor is rank 2, the output shape will be input
              shape  with last dimension set as max_sequence_length. If the
              tensor is above rank 2, the output shape will be the input shape
              excluding the last dimension and the last dimension of the output
              shape will be set to max_sequence_length.
      3. For ragged tensor, make sure the tensor has rank 2.
           a. If feature config has max_sequence_length equals 0 or output shape
              set (the max_sequence_length setting will be ignored), the
              output shape will be the input shape excluding the last dimension.
           b. Otherwise, the output shape will be the input shape excluding the
              last dimension and the last dimension of the output shape will be
              set to max_sequence_length.

    ```python
    strategy = tf.distribute.TPUStrategy(...)
    with strategy.scope():
      embedding = tf.tpu.experimental.embedding.TPUEmbedding(...)

    distributed_dataset = (
        strategy.distribute_datasets_from_function(
            dataset_fn=...,
            options=tf.distribute.InputOptions(
                experimental_fetch_to_device=False))
    dataset_iterator = iter(distributed_dataset)

    @tf.function
    def training_step():
      def tpu_step(tpu_features):
        with tf.GradientTape() as tape:
          activations = embedding.dequeue()
          tape.watch(activations)

          loss = ... #  some computation involving activations

        embedding_gradients = tape.gradient(loss, activations)
        embedding.apply_gradients(embedding_gradients)

      embedding_features, tpu_features = next(dataset_iterator)
      embedding.enqueue(embedding_features, training=True)
      strategy.run(tpu_step, args=(tpu_features,))

    training_step()
    ```

    NOTE: You should specify `training=True` when using
    `embedding.apply_gradients` as above and `training=False` when not using
    `embedding.apply_gradients` (e.g. for frozen embeddings or when doing
    evaluation).

    For finer grained control, in the above example the line

    ```
      embedding.enqueue(embedding_features, training=True)
    ```

    may be replaced with

    ```
      per_core_embedding_features = self.strategy.experimental_local_results(
          embedding_features)

      def per_core_enqueue(ctx):
        core_id = ctx.replica_id_in_sync_group
        device = strategy.extended.worker_devices[core_id]
        embedding.enqueue(per_core_embedding_features[core_id],
                          device=device)

      strategy.experimental_distribute_values_from_function(
          per_core_queue_inputs)
    ```

    Args:
      features: A nested structure of `tf.Tensor`s, `tf.SparseTensor`s or
        `tf.RaggedTensor`s, with the same structure as `feature_config`. Inputs
        will be downcast to `tf.int32`. Only one type out of `tf.SparseTensor`
        or `tf.RaggedTensor` is supported per call.
      weights: If not `None`, a nested structure of `tf.Tensor`s,
        `tf.SparseTensor`s or `tf.RaggedTensor`s, matching the above, except
        that the tensors should be of float type (and they will be downcast to
        `tf.float32`). For `tf.SparseTensor`s we assume the `indices` are the
        same for the parallel entries from `features` and similarly for
        `tf.RaggedTensor`s we assume the row_splits are the same.
      training: Defaults to `True`. If `False`, enqueue the batch as inference
        batch (forward pass only). Do not call `apply_gradients` when this is
        `False` as this may lead to a deadlock.
       name: A name for the underlying op.
       device: The device name (e.g. '/task:0/device:TPU:2') where this batch
         should be enqueued. This should be set if and only if features is not a
         `tf.distribute.DistributedValues` and enqueue is not being called
         inside a TPU context (e.g. inside `TPUStrategy.run`).

    Raises:
      ValueError: When called inside a strategy.run call and input is not
        directly taken from the args of the `strategy.run` call. Also if
        the size of any sequence in `features` does not match corresponding
        sequence in `feature_config`. Similarly for `weights`, if not `None`.
        If input shapes of features is unequal or different from a previous
        call.
      RuntimeError: When called inside a strategy.run call and inside XLA
        control flow. If batch_size is not able to be determined and build was
        not called.
      TypeError: If the type of any sequence in `features` does not match
        corresponding sequence in `feature_config`. Similarly for `weights`, if
        not `None`.
    zQenqueue is not valid when TPUEmbedding object is not created under a TPUStrategy.zConfigured not to check output shapes on each enqueue() call; please ensure build() was called with output shapes to initialize the TPU for embeddings.Nc                      t        j                  t        j                  d      t        j                  d            } j	                  d|       }t        |       yy)z-Generate enqueue ops for outside compilation.train	inferencer   r  r  N)r   where_v2r   constantr  rI   )r  
enqueue_opr  r  r  rH   r8   rE  s     r9   generate_enqueue_opsz2TPUEmbedding.enqueue.<locals>.generate_enqueue_ops  sn     "**8+6+?+?+H+6+?+?+LN
 ..}R' / )

 

D
) r:   rH  rI  c                    | y t        | t        j                        r%t        j                  | j
                  d      |   S t        | t        j                        r%t        j                  | j
                  d      |   S t        d      )Nr   )num_or_size_splitsaxis)sp_input	num_splitrQ  z'SPMD does not support raggedTensor yet.)rS   r   r   r   splitr`   r   r	  r   sparse_split_v2re   )tsidxr8   s     r9   	_split_fnz'TPUEmbedding.enqueue.<locals>._split_fn  s    :J--.!%!<!<   M667++33  
 DE
Er:   c                 z    j                   | S t        j                  |      }t        j                  ||       S )N)rW  )r`   r   r   r-   map_structure)	ts_inputscore_idsplitterrX  r8   s      r9   _maybe_splitz*TPUEmbedding.enqueue.<locals>._maybe_split/  s:    &&.
&&yg>(##Hi8
8r:   r   rJ  r5  z$Non-TPU device {} passed to enqueue.),rV   r{   r(  r-   r   rX   rn   rY   rm   re   _get_input_shapesr   r   r   rZ   r   r   r   rD  r2  r'   outside_compilationrangerR   r   r   select_replicar`   r^   _tpu_devicesworker_devicesr6  r7  r8  device_indexr   r   r   get_host_for_devicer  rI   r[   r9  rf   )r8   r/  r  rE  rH   r   in_tpu_contextper_replicar   r@  _rN  r  enqueue_opsr^  
replica_idreplica_inputsreplica_weightstpu_devicesr\  
tpu_devicer  rM  device_specrX  r  r  r  s   `  ``                   @@@@r9   enqueuezTPUEmbedding.enqueueT  s   L ?? 8 9 9 IIKNt33X>00  &' 	'
 dNk++
Kl % 

3
3L
AC ,,x(K6C,,L
  !5!5w?\\'*l99$:N:NOM'MJ++KD
--h7* *( ))*>?	!)g{m
 kF 9 dnnAAB +*)889DF*99*:FH &&//<<ZH+00??
KL+ T88=A> 	+G"7+*""..z:GG  zz+99*EF 
+22^W5_g6-]	 3 LJ J-z*
+ 
+	++< "*g{m((44V<k		 	 E	)299&AC 	C ::k55f=> *..}&33' / )
 

D
)* *%
+ 
+$* *s   ;AN;/O;OOrh  rg  c           	      B   g }t        t        j                  |      t        j                  | j                              D ]  \  \  }}}|st        j                  d|      }n|}t        |t        j                        r$|j                  | j                  ||||             bt        |t        j                        r$|j                  | j                  ||||             t        |t        j                         s|j                  | j#                  ||||              |S )z+Get the input shapes from the input tensor.r   )r   r-   r   rZ   rX   r   rb  rS   r   r   r[   _get_input_shape_for_tensorr   r	  "_get_input_shape_for_sparse_tensorr"   r  "_get_input_shape_for_ragged_tensor)	r8   tensorsrh  rg  r   r   maybe_tensorro   r   s	            r9   r_  zTPUEmbedding._get_input_shapesd  s    L),--g6T))**, 
%|g !00LA	FJ--	.,,VWk4P	
 fm88933d	

 fm88933d	
'
0 r:   c                 V   |j                   j                         }t        |      dk  r$t        dj	                  t        |      |            t        |      dkD  r"|d   dk7  rt        dj	                  |            | j
                  r|r|d   | j
                  z  |d<   t        |      S )z)Get the input shape for the dense tensor.r   zXOnly rank 1 and above dense tensor is supported, find rank {} sparse tensor for input {}r   zRank 2 or above dense tensor should have last dimension as 1 as the last dimension will always be reduced. Instead got dense tensor as shape {}r   )r   r   r   re   rf   r`   r   r8   r   ro   rh  r   r   s         r9   rs  z(TPUEmbedding._get_input_shape_for_tensor  s     LL  "E
5zA~ BBH&u:tC-. . 5zA~%)q.117@ @
 ""{qT888eAhur:   c                    |j                   j                         }t        |      dk  r$t        dj	                  t        |      |            |j
                  sE|j                  dkD  r6t        |      dk(  r(|j                  t        |      dz
  |j                         | j                  r|r|d   r|d   | j                  z  |d<   t        |      S )z*Get the input shape for the sparse tensor.r   zYOnly rank 2 and above sparse tensor is supported, find rank {} sparse tensor for input {}r   r   )
r   r   r   re   rf   r\   r   r   r`   r   ry  s         r9   rt  z/TPUEmbedding._get_input_shape_for_sparse_tensor  s     LL  "E
5zA~ BBH&u:tC-. . G$?$?!$C 
Uq 	SZ!^W%@%@A""{uQxqT888eAhur:   c                 8   ~|j                   j                         }t        |      dk7  r$t        dj	                  t        |      |            |j
                  s7|j                  dkD  r(|j                  t        |      dz
  |j                         t        |      S )z*Get the input shape for the ragged tensor.r   zOOnly rank 2 ragged tensor is supported, find rank {} ragged tensor for input {}r   r   )	r   r   r   re   rf   r\   r   r   r   ry  s         r9   ru  z/TPUEmbedding._get_input_shape_for_ragged_tensor  s     	LL  "E
5zQ BBH&u:tC-. . G$?$?!$C ll3u:>7#>#>?ur:   incoming_output_shapesc                     t        j                  | j                  |       g }t        | j                  |      D ]*  \  }}|r|j	                  |       |j	                  |       , || _        y)a"  Update the existing output shapes based on the new output shapes.

    The existing output shapes always have higher piority than the new incoming
    output shapes.
    Args:
      incoming_output_shapes: nested structure of TensorShape to override the
        existing output shapes.
    N)r-   r   rY   r   r[   )r8   r|  updated_output_shapesold_output_shapeincoming_output_shapes        r9   r   z"TPUEmbedding._update_output_shapes  ss     	t224JK36t7J7J7M4O <//	$$%56$$%:;< 0Dr:   c           	         t        j                  | j                  |       t        t        j                  | j
                        | j                  |      D ]  \  \  }}}}|s|st        |      dk(  st        |      dk(  rt        |      t        |      kD  rDt        |      t        |      k7  s| j                  ||      rnt        d| d| d|        y)zBCheck the incoming output shapes against the output shapes stored.r   r   z-Inconsistent shape founded for input feature z, Output shape is set to be z , But got incoming output shape N)	r-   r   rY   r   r   rX   r   _is_tensor_shape_matchre   )r8   r|  r   ri  r  r  s         r9   r   z!TPUEmbedding._check_output_shapes  s     	t224JK>A--d.B.BC3?5 H:	q#%: 
3 %&!+s3H/I0*+c2G.HH
 C!%# #*.*E*E "7+9=dV D++;*< =//D.EGH HHr:   c                     t        t        j                  | j                        | j                        D ]*  \  \  }}}|j                         rt        d| d| d       y)z+Check if the output shape is fully defined.zInput Feature z has output shape set as zt which is not fully defined. Please specify the fully defined shape in either FeatureConfig or for the build method.N)r   r-   r   rX   rY   is_fully_definedre   )r8   r   ri  r\   s       r9   r   z/TPUEmbedding._check_output_shapes_fully_defined  sp    #&--d.B.BC$ (	q< **,TF";n ''( 	(	(r:   shape_ashape_bc                 ~    t        |j                         |j                               D ]  \  }}|s	|s||k7  s y y)z&Check if shape b matches with shape a.FT)r   r   )r8   r  r  s_as_bs        r9   r  z#TPUEmbedding._is_tensor_shape_match  s?     )7??+<= S	 r:   c                 
   g }t        j                  | j                        D ]^  }|j                  s6|j                  dkD  r'|j                  t        ||j                  g             E|j                  t        |             ` |S )z*Get the output shapes from the batch size.r   )r-   rZ   rX   r\   r   r[   r   )r8   r   r   ro   s       r9   r   z/TPUEmbedding._get_output_shapes_from_batch_size  s~    M<< 4 45 B!!g&A&AA&E/1L1LMN	P 	[)?@AB r:   c                     t        |||      S )zECreate a TPUEmbedding copy for checkpoint/async_checkpoint_helper.py.)rL   rM   rN   )rK   )r8   rL   rM   rN   s       r9   !_create_copy_for_async_checkpointz.TPUEmbedding._create_copy_for_async_checkpoint  s     %,OQ Qr:   )F)NNrv   )NTNN)8r;   r<   r=   r>   r   r&   FeatureConfigr   r   rd   boolrs   r   r   r   r   r   r|   r   r?   r   TableConfigr   r   r   r   r   r}   r	   r   r   r   r   r   r  r  r  internal_typesNativeObjectr   	Operationr  r(  r2  rD  rq  r_  rs  rt  ru  r   r   r   r  r   r  r@   r:   r9   rK   rK   K   s   `L 38	p12@@(JKp1 0;;<p1 ,0	p1dDN JN	""*5cH1D+E"F	" ?C.24. (k): ;4. 'sm4.l{+04[0A4  -"..0E0EEF -  -DB&@@BHiXd^ iVKD(4. KDZDD$t\222334DL
/
3	 "<"B334B ."="=>?B 0>>?	B
 B B }}BH6H@-@ !#N* 	N*
 TNN* tnN*`"&8<K>(0&0${:K 0&Hk9J H2
(K &16:	Qr:   rK   r   hostsr!   table_configc                 Z   d }t        |      D ]  \  }}t        j                  |      5  t        j                   ||      |      }|D ]I  } |j
                  j                         d|j                  t        |      || d||j                      d} K 	 ddd        y# 1 sw Y   xY w)a  Load embedding tables to onto TPU for each table and host.

  Args:
    config: A serialized TPUEmbeddingConfiguration proto.
    hosts: A list of CPU devices, on per host.
    variables: A dictionary of dictionaries of TPUEmbeddingVariables. First key
      is the table name, second key is 'parameters' or the optimizer slot name.
    table_config: A list of tf.tpu.experimental.embedding.TableConfig objects.
  c                       fd}|S )Nc                     t        | j                        k\  r"t        j                  | j                  d         S | j                     S r5   )r   r!   r   
zeros_like)r   host_ids    r9   select_or_zerosz@_load_variables_impl.<locals>.select_fn.<locals>.select_or_zeros'  s>    	C$	$
 ##AKKN33[[!!r:   r@   )r  r  s   ` r9   	select_fnz'_load_variables_impl.<locals>.select_fn%  s    " r:   
table_name
num_shardsshard_idr   Nr@   )	rc   r   r   r-   rZ  rM   _loadrH   r   )	r   r  r!   r  r  r  hosthost_variablesrb   s	            r9   r   r     s     !' mgt	D	 )))G*<iHn % 	*zz5z		*
 UZZ(	*   s   A+B!!B*	c           	         t        |      D ]  \  }}t        j                  |      5  |D ]  } |j                  j	                         |j
                  t        |      ||       }t        |t              s|f}t        dg|j                  j                         z         D ]Q  \  }}	||j
                     |	   }
|t        |
j                        k  s1|
j                  |   j                  ||          S d}  	 ddd        y# 1 sw Y   xY w)a  Retrieve embedding tables from TPU to host memory.

  Args:
    config: A serialized TPUEmbeddingConfiguration proto.
    hosts: A list of all the host CPU devices.
    variables: A dictionary of dictionaries of TPUEmbeddingVariables. First key
      is the table name, second key is 'parameters' or the optimizer slot name.
    table_config: A list of tf.tpu.experimental.embedding.TableConfig objects.
  r  r   N)rc   r   r   rM   	_retrieverH   r   rS   tuple_slot_namesr!   assign)r   r  r!   r  r  r  rb   	retrievedrr   slotsharded_vars              r9   r   r   C  s    !' mgt	D	  %/EOO--/zz5z		 )U+ l) ,!&!<!<!>"? @ 	@GAt "%**-d3+s;0011 !!'*11)A,?	@ 3  s   B'D'DD	c                 P    | j                         D ]  }|j                           g S rv   )r   r   
trackablesunused_kwargs	trackables      r9   _save_callbackr  p  s+    $$& $i!!#$	)r:   c                 N    | j                         D ]  }|j                           y rv   )r   r   r  s      r9   _restore_callbackr  v  s&    $$&  i r:   TPUEmbeddingCallbackc                 "    t        | t              S rv   )rS   rK   )r   s    r9   <lambda>r  }  s    
1l3 r:   F)	predicatesave_fn
restore_fnstrict_predicate_restorer   .c                    t        | d   t        j                        rd| d   j                  v s| d   j                  rld| d   j                  v r| d   j                  d   }n| d   j                  d   }| d   || d   j                  j                  d| d         | d   j                  fS d| vs| d   t        | d         st        dj                  | d               | d   | d   | d   | d   fS )zExtracts the variable creation attributes from the kwargs.

  Args:
    kwargs: a dict of keyword arguments that were passed to a variable creator
      scope.

  Returns:
    A tuple of variable name, shape, dtype, initialization function.
  r   r   r   rH   r   a2  Unable to extract initializer function and shape from {}. Please either pass a function that expects a shape and dtype as the initial value for your variable or functools.partial object with the shape and dtype kwargs set. This is needed so that we can initialize the shards of the ShardedVariable locally.)
rS   r   r   keywordsargsgetfuncri   re   rf   )kwargsr   s     r9   extract_variable_infor    s+    ()*;*;<(111_""&)222_%..w7e_%**1-e6NE?#,,00&/J?#((* * fw 7x_@
	@ AG?#A%& & 6NF7OVG_?#% %r:   c                 F     dt         dt        j                  f   f fd}|S )zMakes a sharded variable creator given a list of hosts.

  Args:
    hosts: a list of tensorflow devices on which to shard the tensors.

  Returns:
    A variable creator function.
  next_creator.c           	         d|d<   t              }t        |      \  }}}}|d   }|d   }	|d   }
|	|z  }|	|z  }|dz   g|z  |g||z
  z  z   }g }dt        j                  |      j                  v }d}||d<   t        |      D ]  \  }}|dk(  rt        j                  |         5  dj                  ||      |d	<   ||
f|d
<   |r;t        j                  |d
   |df      }t        j                  ||      |d<   ||z  }nt        j                  ||d
   |      |d<   |j                   | |i |       ddd        t        ||      S # 1 sw Y   xY w)zThe sharded variable creator.Tskip_mirrored_creatorr   r   r   
shard_infor   z{}_{}rH   r   )r  r   Nr   )r   r  r.   
getargspecr  rc   r   r   rf   r*   	ShardInfor   r   r[   r3   )r  r  r  r   rH   r   r   unwrapped_initial_valuer   rowscolspartial_partitionfull_rows_per_host
partitionsr!   sharding_awareoffsetrr   pr  r  s                       r9   sharded_variable_creatorz?make_sharded_variable_creator.<locals>.sharded_variable_creator  s    '+F"#E
I2G2O/D%/?+M8D8Dy(* 
a	 #44
).?"?
@	A  I!Z%:%:=%I%N%NNN FF7O*% 81	
a 	::eAh 8 a0vd)w~~fWo{C*$-$5$5
%4&
!
A+&$-$5$5%vge%E&
!t6v678 88$  	558 8s   'BEE	)r   r   r   )r  r  s   ` r9   r   r     s)    +6S,"7"778+6X 
"!r:   )Wr>   r   typingr   r   r   r   r   r   r	   r
   r   abslr   tensorflow.core.frameworkr   tensorflow.core.protobuf.tpur   tensorflow.python.distributer   r   r   r   r   tensorflow.python.eagerr   r   tensorflow.python.frameworkr   r   r6  r   r   r   r   r   (tensorflow.python.framework.tensor_shaper   tensorflow.python.opsr   r   r   r    r!   r   tensorflow.python.ops.raggedr"   tensorflow.python.saved_modelr#   r$   tensorflow.python.tpur%   r&   r'   tensorflow.python.tpu.opsr(   tensorflow.python.trackabler)   r*   tensorflow.python.typesr+   r  tensorflow.python.utilr,   r-   r.    tensorflow.python.util.tf_exportr/   	_HOOK_KEYrD   ShardedVariableMixinr3   rI   AutoTrackablerK   r   r   r   r  r   r   r  r  register_tf_checkpoint_saverDTyper  r   r@   r:   r9   <module>r     s1   (  T T T  4 H 4 7 9 9 5 + 0 3 ; . + 5 < @ + * , 0 ; 6 6 6 % 8 1 - 5 , > ) ' - 6 $	"	,+@@ ,M 45GQ=.. GQ 6GQT. ))c4i !) D$t\%:%::;;<) )44	) )X ))c4i !) D$t\%:%::;;<) )44	) )X 
 * ) )3  #	!%T5c?FLL(2s7:KKL!%H7":7""3(<#<=7"r:   