
    BVhkM                       d Z ddlZddlZddlZddlZddlZddlZddlmZm	Z	m
Z
mZmZmZmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z" ddl!m#Z$ ddl!m%Z% ddl!m&Z& ddl!m'Z' ddl!m(Z( ddl!m)Z) ddl*m+Z+ ddl*m,Z, ddl*m-Z- ddl*m.Z. ddl*m/Z/ ddl*m0Z0 ddl*m1Z2 ddl3m4Z4 ddl5m6Z6 ddl5m7Z7 dd l5m8Z8 dd!l5m9Z9 dd"l5m:Z: dd#l5m;Z; dd$l<m=Z> dd%l?m@Z@ dd&lAmBZB dd'lCmDZD dd(lCmEZE dd)lCmFZF dd*lGmHZH d+ZId,ZJd-ZKe8j                  ZLe8j                  ZMe8j                  ZN eHd.       ej                  d/0       G d1 d2                    ZP G d3 d4e,j                        ZR G d5 d6eBj                        ZTd7 ZUej                   G d8 d9             ZV ej                  d:       G d; d<ej                  ej                               ZZ ej                  d=g d>      Z\d? Z]	 dQd@eeL   dAeeeeMf      dBe^dCe^dDeeP   dEeVfdFZ_ eHdG       G dH dIe7j                               ZadJedEeebee^dKf   e%j                  e	g ef   ee^   f   fdLZddMedEeefdNZfdOej                  dEe	dKef   fdPZhy)Rz?Mid level API for TPU Embeddings With V2 Embedding Accelerator.    N)	AnyCallableDictIterableListOptionalSequenceTupleUnion)logging)attr_value_pb2)sparse_core_layout_pb2)saveable_compat)device_util)distribute_lib)tpu_strategy)tpu_util)
tpu_values)values)values_util)constant_op)device)dtypes)ops)sparse_tensor)tensor)tensor_shape)	array_ops)control_flow_ops)gen_resource_variable_ops)math_ops)summary_ops_v2)variable_scope)	variables)ragged_tensor)_pywrap_sparse_core_layout)tpu_embedding_base)tpu_embedding_v2_utils)#tpu_embedding_v3_checkpoint_adapter)tpu_embedding_v3_utils)tpu_replication)gen_xla_ops)base)saveable_object)compat)nest)
tf_inspect)	tf_export_embedding_pipeliningforwardbackwardz4tpu.experimental.embedding.SparseCoreEmbeddingConfigT)frozenc                       e Zd ZU dZdZeed<   dZeed<   dZ	e
eeef      ed<   dZe
eeef      ed<   dZeed	<   d
Zeed<   dZeed<   y)SparseCoreEmbeddingConfigz Config for sparsecore embedding.Fdisable_table_stacking@   max_ids_per_chip_per_sampleNmax_ids_per_tablemax_unique_ids_per_tableallow_id_droppingTinitialize_tables_on_host enable_fast_table_initialization)__name__
__module____qualname____doc__r9   bool__annotations__r;   intr<   r   r   strr=   r>   r?   r@        V/home/dcms/DCMS/lib/python3.12/site-packages/tensorflow/python/tpu/tpu_embedding_v3.pyr8   r8   M   sn     )!&$&%'s'04Xd38n-47;HT#s(^4;!T!$(T(+0"D0rJ   r8   c                   d     e Zd ZdZdedef fdZ	 d
dedef fdZde	j                  fd	Z xZS )EmbeddingPipeliningContextzISets the _embedding_pipelining attribute on all ops created in the scope.modeenablec                    t         |           d| _        t        j                  t        j                  |            | _        t        j                         }|r|rt        j                  d       d| _        y || _        y )NEmbeddingPipelinigContext)sz`Embedding pipelining requested but summaries are being recorded: Disabling embedding pipelining.F)super__init___namer   	AttrValuer/   as_bytes_moder"   is_recording_summariesr   info_enable)selfrN   rO   recording_summaries	__class__s       rK   rT   z#EmbeddingPipeliningContext.__init__^   sf    	G,DJ))FOOD,ABDJ(??A%ll- dldlrJ   context_defexport_scopec                 &    t         |   ||       y N)rS   to_control_flow_context_def)r\   r_   r`   r^   s      rK   rc   z6EmbeddingPipeliningContext.to_control_flow_context_defl   s    
 
G'\BrJ   opc                     | j                   r |j                  t        | j                         | j                  r| j                  j                  |       y y rb   )r[   	_set_attr_PIPELINE_ATTRIBUTErX   _outer_contextAddOp)r\   rd   s     rK   ri   z EmbeddingPipeliningContext.AddOps   s>    ||ll&

3
# rJ   rb   )rA   rB   rC   rD   rH   rE   rT   r   rc   r   	Operationri   __classcell__r^   s   @rK   rM   rM   [   sF    Q3   37CC,/C$cmm $rJ   rM   c            
            e Zd ZdZdej
                  dedededef
 fdZde	e
j                     d	e	ej                     d
efdZ xZS )TPUEmbeddingShardedSaveablezIDefines how to save and restore a shard of TPUEmbedding sharded variable.variableshard_id
num_shards	shard_dimnamec                    || _         || _        dgt        |j                        z  }||j                  |   z  ||<   |j                  j	                         }|||   z  ||<   t
        j                  j                  ||||j                  j	                               }t        j                  |j                  |j                  ||j                  |j                        }	t        
| =  |j                  |	g|       y)z!Init TPUEmbeddingShardedSaveable.r   )	full_name
full_shape
var_offset	var_shape)r   
slice_specrs   dtyper   N)	_shard_id	_variablelenshapeas_listtf_variablesVariableSaveSliceInfor.   SaveSpec
read_valuespecrz   r   rS   rT   )r\   ro   rp   rq   rr   rs   rw   	fullshapesave_slice_infor   r^   s             rK   rT   z$TPUEmbeddingShardedSaveable.__init__~   s     DNDNs8>>**J$x~~i'@@Jy&&(I%	)(<<Ii"++99..((*	 : O ##"""''nnD 
GX((4&$7rJ   restored_tensorsrestored_shapesreturnc                 x    ~|d   }t        j                  | j                  j                  | j                  |      S Nr   )r   assign_on_devicer|   r   )r\   r   r   restored_tensors       rK   restorez#TPUEmbeddingShardedSaveable.restore   s9    
 	&q)O''t~~ rJ   )rA   rB   rC   rD   r   r   rG   rH   rT   r   r   Tensorr   TensorShaper   r   rk   rl   s   @rK   rn   rn   {   sw    Q8%%8 8 	8
 8 8B
V]]+
 L445
 	
rJ   rn   c                  6    t        j                  t              S )N)default_factory)dataclassesfielddictrI   rJ   rK   
_fielddictr      s    			4	00rJ   c                   d   e Zd ZU dZ e       Zeeef   e	d<    e       Z
eeef   e	d<    e       Zeeef   e	d<    e       Zeeef   e	d<    e       Zeeef   e	d<    e       Zeeef   e	d<    e       Zeeej&                  f   e	d<    e       Zeeeeeef   f   e	d	<    e       Zeeef   e	d
<   y)TableStackingz&Information about how we stack tables.stacked_table_to_tablesquantization_configstable_name_to_tabletable_to_padding_rowstable_to_padding_columnstable_to_sample_counttable_to_layouttable_to_stacked_table_offsetfeature_to_sample_offsetN)rA   rB   rC   rD   r   r   r   rH   TableConfigrF   r   QuantizationConfigr   r   rG   r   r   r   r   SparseCoreTableLayoutr   r
   r   rI   rJ   rK   r   r      s    . 5?L4[ 01@8BS"445D 1;tC,-<*4,c3h6-7\DcN9*4,c3h6l 43IIIJ  DN<c5c3+?&?!@O .8\DcN9rJ   r    c                        e Zd ZdZdefdZedefd       Zede	j                  fd       Z fdZdeeedef   f   fdZdd	Zdefd
Z	 	 	 ddededee   dedef
dZd Zd Z xZS )TPUEmbeddingShardedVariablez4A ShardedVariable class for Embedding tables on TPU.r   c                      y)NFrI   r\   s    rK   _is_mirroredz(TPUEmbeddingShardedVariable._is_mirrored   s    rJ   c                      yr   rI   r   s    rK   rr   z%TPUEmbeddingShardedVariable.shard_dim   s    rJ   c                     | j                   d   j                  }|j                         }|| j                     t	        | j
                        z  || j                  <   t        j                  |      S )zDReturns the shape of the embedding variable for the current context.r   )_valuesr~   r   rr   r}   r   r   r   )r\   local_shapeglobal_shapes      rK   r~   z!TPUEmbeddingShardedVariable.shape   sc     ,,q/''K&&(L#/#?#C $L  ##L11rJ   c                     t         |   ||       |j                  j                  j	                  | j
                  d   j                  j                                y r   )rS   _write_object_protoro   r~   CopyFromr   as_proto)r\   protooptionsr^   s      rK   r   z/TPUEmbeddingShardedVariable._write_object_proto   sC    	Gw/ 
NN!!$,,q/"7"7"@"@"BCrJ   .c                 J      j                   f fd	}t        j                  |iS )zxOverrides Trackable method.

    Returns:
      A dictionary mapping attribute names to `SaveableObject` factories.
    c           
          g }t        j                        }t        |      D ]7  }|j                  t	        j                  |   ||j
                  |              9 |S rb   )r}   r   rangeappendrn   rr   )rs   	saveablesrq   rp   r\   s       rK   _saveable_factoryzWTPUEmbeddingShardedVariable._gather_saveables_for_checkpoint.<locals>._saveable_factory   sd    it{{#jJ' 	
('H%	
	
 rJ   )_common_namer-   VARIABLE_VALUE_KEY)r\   r   s   ` rK    _gather_saveables_for_checkpointz<TPUEmbeddingShardedVariable._gather_saveables_for_checkpoint   s'      $00  ##%677rJ   c                     t        j                         | j                  d   j                         S | j	                         S )z Converts a variable to a tensor.r   )r   enclosing_tpu_contextr   r   _read_variable_op)r\   rz   rs   as_refs       rK   _dense_var_to_tensorz0TPUEmbeddingShardedVariable._dense_var_to_tensor   s9     %%'/\\!_''))##%%rJ   c                 `    t        j                         t        d      | j                         S )NzRReading in cross replica mode is not yet supportedfor TPUEmbeddingShardedVariable.)r   r   NotImplementedErrorr   r   s    rK   r   z&TPUEmbeddingShardedVariable.read_value  s4    %%'/- 
 ##%%rJ   valueuse_lockingrs   r   c                 H   t        j                         U| j                  j                  j                  D ]2  }t        j                  |      5  | j                  ||       d d d        4  t        j                  t        j                        | ||||      S # 1 sw Y   lxY w)N)r   r   rs   r   )r   r   distribute_strategyextendedworker_devicesr   r   r   make_raw_assign_fnr    assign_variable_op)r\   r   r   rs   r   r   s         rK   assignz"TPUEmbeddingShardedVariable.assign  s     %%'/,,55DD /&ZZ 	/



.	/ 	//8&&!44 	 	/ 	/s   BB!	c                     | j                   t        d      t        j                  |      5  t	        j
                  | j                   j                  |       d d d        y # 1 sw Y   y xY w)N Required packed variable support)resourcer   )_packed_varr   r   r   r    r   handle)r\   r   r   s      rK   r   z,TPUEmbeddingShardedVariable.assign_on_device%  sZ     BCC	F	 22##**%  s   ,A""A+c                     | j                   t        d      t        j                  |      5  t	        j
                  | j                   j                  | j                        cd d d        S # 1 sw Y   y xY w)Nr   )r   rz   )r   r   r   r   r    read_variable_opr   rz   )r\   r   s     rK   read_from_devicez,TPUEmbeddingShardedVariable.read_from_device-  s^     BCC	F	 &77##**$**  s   5A,,A5)NNF)FNT)rA   rB   rC   rD   rE   r   propertyrG   rr   r   r   r~   r   r   rH   r   r   r   r   r   r   r   r   r   rk   rl   s   @rK   r   r      s     =D     2\-- 2 2D8S(38:L5L0M 80&&# &     SM	
  ,rJ   r   PartitionedCsrFormatTensorrow_pointerssorted_sample_idssorted_token_idssorted_gainssample_count(num_minibatches_per_physical_sparse_corec                 R   i }g }t        j                  |       D ]v  }t        j                  |      }|j                  |vr,t        j                  |j                        ||j                  <   ||j                     |_        |j	                  |       x t        j
                  | |      S rb   )r0   flattencopytabler   pack_sequence_as)feature_configold_to_new_tablenew_featuresold_featurefeatures        rK   _clone_feature_configr   D  s    ,\\.1 !kii$G}},,(,		'--(@w}}%$W]]3GM ! 
		~|	<<rJ   table_configflat_featuresnum_partitionsnum_sc_per_partitionsparse_core_embedding_configr   c           
      	   t        j                  dt        |              d}|r|j                  }|rt        j                  d       t        j                  |||      }t               }| D ci c]  }|j                  | c}|_	        | D ci c]  }|j                  d }	}|D ]8  \  }
}|	|j                  j                  xx   t        j                  t        j                  |j                        z  cc<   t!        | d       }|D ]  }|j"                  rt%        |j&                        t)        |j*                        f}t-        j.                  t)        |      j1                         d      j3                         }d	|z   }|j5                  |j                  |j6                  |j8                  ||	|j                     
        |j;                         j<                  D ]0  }|j                  |j>                     }|j"                  rJ ||_        2 tA        jB                  tD              }|D ]`  }|j"                  }|j>                  |j                  k(  sJ ||jF                  |j                  <   ||jH                     jK                  |       b |jM                         D ]^  \  }}|d   j*                  |jN                  |<   ||jP                  |<   t        jR                  dd|       |D ]  }|j"                  }t        jR                  dd|j                  |jT                  |jV                         ||jT                  |z  |z  |jV                  f|jX                  |j                  <   |jZ                  d   |_        |jZ                  d   |_        |jZ                  d   |j\                  d   z
  |j^                  |j                  <   |jZ                  d   |j\                  d   z
  |j`                  |j                  <    a t        j                  dt        |jP                               |jP                  D ci c]  }|d c}|_1        |D ]  \  }}|jX                  |j                  j                     d   }|jb                  |   |jd                  |<   |jb                  |xx   t        j                  t        j                  |j                        z  cc<    |c S  yc c}w c c}w c c}w )z3Stack tables with the same table dim and optimizer.z&Number of tables before stacking is %dFzTable stacking is disabled.)r   sparse_cores_per_partitionr9   r   c                     | j                   S rb   )rs   )ts    rK   <lambda>zA_stack_tables_with_same_table_dim_and_optimizer.<locals>.<lambda>q  s
    qvv rJ   )key)usedforsecurity_xxtpuv3internal_)
table_nametable_heighttable_widthgroupoutput_samples   zStacked table name: %sz"  Table %s: offset %d, rotation %dz&Number of tables after stacking is %d.N)3r   rZ   r}   r9   warnr&   SparseCoreLayoutStackerr   rs   r   r   	functoolsreduceoperatormuloutput_shapesortedlayouthash	optimizerreprquantization_confighashlibsha1encode	hexdigestAddTablevocabulary_sizedim
GetLayoutstablesr   collectionsdefaultdictlistr   stacked_table_namer   itemsr   r   vlogsparse_core_shard_row_offsetsparse_core_shard_rotationr   unsharded_padded_shapeunsharded_shaper   r   r   r   )r   r   r   r   r   r9   stackerrR   r   table_to_num_samples_r   sorted_tables	key_tuplekey_strr   r  tables_by_stack
stack_namer  r   feature_pathr  s                          rK   /_stack_tables_with_same_table_dim_and_optimizerr(  R  s    
,,7\9JK !9PPLL./&>>#!53'
 o!:FG5::u,G!5ABE%**a-BB! mja++,	0@0@g**1 , <-=>M 
\\ !**+	
	 ,,O""$!
 )+ 	 "G+zz..		/

; 	 	
/
H $$&-- ##F$5$56eel "--d3O ?||f%**,,,&,a

#f//077>	? .335 

F+1!9+H+HaZ(.4a
+ll1.
; 
%0JJ//--	
 //"# --7
''

3 !' = =a @11!4	))!,v/E/Ea/HH 	


+ ))!,v/E/Ea/HH 	
""5::.-

> LL0A%%& )*(A(A$
AA "/ 	g::7==;M;MN
 231H1H
2a  . 01Y5E5E
,,,,6 1	 H[m HB@s   -S"S';
S,z)tpu.experimental.embedding.TPUEmbeddingV2c                       e Zd ZdZdZdZ	 	 	 dNdeej                  e	f   de
ej                     dede
e   f fdZded	eeef   d
ee   dededej(                  fdZdedej(                  deeef   fdZdee   ded	eeef   dej2                  deeeeeej8                  f      f   f
dZd Zedeej                  e jB                  f   fd       Z"edeej                  ee jB                     f   fd       Z#edeee$jJ                  f   fd       Z&edeej                  eee jB                  f   f   fd       Z'deej                     dedeee jB                  f   fdZ(deeeee jB                  f   f   fdZ)dOdZ*d Z+d Z,d  Z-d!e.d"eee/f   fd#Z0	 dPd$e.d%e
e.   de1e.eee/f   f   fd&Z2d' Z3e4	 	 	 dQd$e.deee	f   d(ed)ede
ej                     de
e   de1e.e.f   fd*       Z5	 	 dRd$e.d%e
e.   d+e
e   de.fd,Z6d-eee.f   de.fd.Z7d-e1eee/f   eef   de1e.eee/f   f   fd/Z8	 dPd$e.d%e
e.   de1e.eee/f   f   fd0Z9	 dPd1ed2ed3ed)ed4ed5eee.f   d6eee1eeef   f   d7eeef   d8eeef   d9e.d:e.d;e
e.   de.fd<Z:e4d=eej8                  e;jx                  e=j|                  f   d>e
ej8                     dej                  d?ed@edAedBed)ed4edCede.fdD       Z?e4d:e.d;e.d9e.d5eee.f   d6eee1eeef   f   d8eeef   d)edEeeef   d4edeee.f   fdF       Z@d1edGeee.f   d5eee.f   d)ede1eee.f   eej8                     f   f
dHZAd1ed2ed3edIeee.f   d5eee.f   d7eeef   d)ede.fdJZBdKeeej8                  f   fdLZCdM ZD xZES )STPUEmbeddingV2zKThe TPUEmbedding mid level API running on TPU with sparse core accelerator.   r   r	  #pipeline_execution_with_tensor_corer   c                 d   t         |   t        |      |       t        j                         | _        t        | j
                  t        j                  t        j                  f      s$t        dj                  | j
                              | j                  D ]H  }|j                  j                  |j                  j                  	 1t!        d|j"                   d       | j
                  j$                  j&                  j(                  | _        | j*                  dk(  rt-        j.                  d       d| _        | j
                  j0                  | j*                  z  | _        t5        j6                  | j8                        | _        |0t=               | _        t-        j.                  d| j>                         n|| _        tA        | j                  | j:                  | j
                  j0                  | j*                  | j>                        | _!        | jB                  jD                  | _#        | jB                  jH                  | _%        | jB                  jL                  | _'        | jB                  jP                  | _)        | jB                  jT                  | _+        | jB                  jX                  | _-        | jB                  j\                  | _/        | jB                  j`                  | _1        | j>                  jd                  | _2        d	| _3        i | _4        i | _5        | jm                          || _7        y)
a  Creates the TPUEmbeddingV2 mid level API object.

    Args:
      feature_config: A nested structure of
        `tf.tpu.experimental.embedding.FeatureConfig` configs.
      optimizer: An instance of one of `tf.tpu.experimental.embedding.SGD`,
        `tf.tpu.experimental.embedding.Adagrad` or
        `tf.tpu.experimental.embedding.Adam`. When not created under TPUStrategy
        may be set to None to avoid the creation of the optimizer slot
        variables, useful for optimizing memory consumption when exporting the
        model for serving where slot variables aren't needed.
      pipeline_execution_with_tensor_core: If True, the TPU embedding
        computations will overlap with the TensorCore computations (and hence
        will be one step old). Set to True for improved performance.
      sparse_core_embedding_config: Configs for sparse core embedding including
        settings for table stacking, input feature static buffer size etc.

    Raises:
      ValueError: If optimizer is not one of tf.tpu.experimental.embedding.(SGD,
      Adam or Adagrad) or None when created under a TPUStrategy.
      RuntimeError: If not created under TPUStrategy.
    z@TPUEmbeddingV2 should be created under TPUStrategy but found {}.Nzpweight_decay_factor and multiply_weight_decay_factor_by_learning_rate are not supported yet. But found in table z	 setting.r   zYNo embedding devices per chip info is found. Using 4 as the default value for SparseCore.   zBSparseCoreEmbeddingConfig is not provided. Using default values %sr:   )8rS   rT   r   r   get_strategy	_strategy
isinstancer   TPUStrategyTPUStrategyV2RuntimeErrorformat_table_configr	  weight_decay_factor-multiply_weight_decay_factor_by_learning_rater   rs   r   tpu_hardware_featurenum_embedding_devices_per_chip_num_sc_per_chipr   warningnum_replicas_in_sync_num_sc_shardsr0    flatten_with_joined_string_paths_feature_config_flat_featuresr8   _sparse_core_embedding_configr(  _sr   _table_name_to_tabler   _stacked_table_to_tablesr   _table_to_padding_columnsr   _table_to_padding_rowsr   _table_to_stacked_table_offsetr   _table_to_sample_countr   _feature_to_sample_offsetr   _quantization_configsr;   max_minibatches_per_sc!_table_to_max_ids_per_sparse_core(_table_to_max_unique_ids_per_sparse_core4_update_sparse_core_buffer_size_after_table_stacking_pipelining)r\   r   r	  r,  r   r   r^   s         rK   rT   zTPUEmbeddingV2.__init__  s   @ 
G*>:IF#002DN11<3M3MN 
L6$..!  ## 


//
-
-
9__JJ "((-

|9>
 	


 	44SS 	 !oo#  d 	++d.C.CC 	 ??D $++D+Fd(oo
N

,
,
 ,Hd(=++**DG !% ; ;D$(GG$C$CD!%)WW%E%ED""&''"?"?D*.''*O*OD'"&''"?"?D%)WW%E%ED"!%!=!=D
 	**FF 	$ #%D-/D*46D1==?:DrJ   r   partition_shapepartition_offsettotal_vocab_sizesc_idxr   c                     |j                   |d   z  |z  | j                  z  }|j                   |d   z  |z  ||z  z   }t        j                  ||j                  g|dg      S r   )r  r;  r-   	ShardInfor  )r\   r   rQ  rR  rS  rT  sc_shard_sizesc_shard_offsets           rK   _compute_sc_shard_infoz%TPUEmbeddingV2._compute_sc_shard_infoU  s     	
!
		   	!  	
1
		 		O >>=%))46JKKrJ   r   
shard_infoc                 *   | j                   j                  j                  }|j                  \  }}||z  }| j                  j
                  |   d   }|j                  d   |j                  d   z  }||z
  | j                  z  }|| j                  z  }	||	fS )N   r   )	r0  r   _tpu_devicesr~   rC  r   offsetr>  r;  )
r\   r   rZ  tpu_devicesnum_replicasnum_cores_per_replicanum_devicesshiftshard_indexnum_scs
             rK    _compute_sc_shard_idx_and_offsetz/TPUEmbeddingV2._compute_sc_shard_idx_and_offsetm  s    
 ..))66K*5*;*;'L'!66KGG11*=a@E##A&**:*:1*==K&$*=*==K4000FrJ   stacked_tablesrz   c                 p   i }| j                   j                  j                  }|j                  \  }}||z  }	|d   |	z  |d   f}dgt	        |      z  }
t        |      D ]  }t        |      D ]  }t        j                  j                  ||   |         j                  dd      j                         }||z  |z   |d   z  }g ||<   t        | j                        D ]B  }i }||   j                  i        |D ]#  }t        j                  |j                        }d|j                   v xs d|j"                  v }| j$                  j&                  sX|r[||
d<   | j)                  |||
||      }| j+                  |j,                  |      \  }}t/        j0                  |      5  |j,                  |vr6|j                  |j2                  |j4                  f|      ||j,                  <   ||j,                     |d |d d f   }|||   |   |j,                  <   d d d        & E   |S # 1 sw Y   <xY w)Nr   r   CPU)device_typedevice_indexrZ  r~   rz   )r0  r   r]  r~   r}   r   	tf_device
DeviceSpecfrom_stringreplace	to_stringr;  r   r1   getfullargspecinitializerargs
kwonlyargsrB  r?   rY  rf  rs   r   r   r  r  )r\   rg  rS  rQ  rz   cpu_table_tensorsr_  r`  ra  rb  rR  ridcid
device_cpushard_dim_offsetifull_tablesr   arg_specsharding_awaresc_shard_inford  shard_offsetsc_shards                           rK   _host_table_initializerz&TPUEmbeddingV2._host_table_initializer~  s    ..))66K*5*;*;'L'!66K&q)[8/!:LMOsS11\" 4N,- 3N#  ,,[-=c-BCWQW7Y[ 	 ((C/A /1*+t,,- '	NA +
,
-
4
4R
8% !Ne!001B1BCH- 78#6#66  22LL&
 %5q!"99!""m +/*O*O**m+'k< ::j) N::[0,1,=,="22EII>! -> -+ejj) 'uzz2;3L3La3OPEM!"23A6uzzBN N5!N'	N3N4Nl N Ns   "A3H++H5c                 :   | j                   D ]  }| j                  j                  || j                  j                  vr0t        j                  d|       | j
                  | j                  |<   n&| j                  j                  |   | j                  |<   | j                  j                  || j                  j                  vr0t        j                  d|       | j                  | j                  |<   | j                  j                  |   | j                  |<    y)z8Update the sparse core buffer size after table stacking.NzjTable %s is not found in max_ids_per_table provided by SparseCoreEmbeddingConfig. Using default value 256.zqTable %s is not found in max_unique_ids_per_table provided by SparseCoreEmbeddingConfig. Using default value 256.)
rE  rB  r<   r   r<  DEFAULT_MAX_IDS_PER_TABLErM  r=    DEFAULT_MAX_UNIQUE_IDS_PER_TABLErN  )r\   r   s     rK   rO  zCTPUEmbeddingV2._update_sparse_core_buffer_size_after_table_stacking  s#   33 &



,
,
>
>
F33EEF 	C	
 ** 	..z:
 ..@@L 	..z: 
,
,
E
E
M33LLM 	G 	
 11 	55jA
 ..GG 	55jAE&
rJ   c                     | j                          | j                  D ci c]  }|| j                  |   d    c}S c c}w );Returns a dict of embedding tables, keyed by `TableConfig`.
parameters)_maybe_buildrE  
_variables)r\   r  s     rK   embedding_tableszTPUEmbeddingV2.embedding_tables  sL    
 	 #'"?"? 	DOO,>?MM  s   <c                 J   | j                          g }| j                  j                  j                  D ]  }|j	                  |        | j
                  j                         D ci c]'  \  }}||D cg c]  }||j                  |      f c}) }}}}|S c c}w c c}}}w )r  )r  r0  r   r]  extendr  r  r   )r\   ordered_devicesdevicesrs   varr   table_shardss          rK   embedding_table_shardsz%TPUEmbeddingV2.embedding_table_shards  s    
 	 O>>**77 &W%& ..446	  D# 	AP
7=VS))&12
 	
L  
s   'B4BBBc                 .    | j                   j                  S )zReturns how the tables are laid out in the variables.

    The SparseCoreTableLayout describes how a table is stored in its internal
    state. You need this only if you need to pull apart the internal state.
    )rC  r   r   s    rK   embedding_layoutsz TPUEmbeddingV2.embedding_layouts  s     77"""rJ   c                 :    | j                          | j                  S )zGReturns a dict of variables, keyed by `TableConfig`, then by slot name.)r  r  r   s    rK   r$   zTPUEmbeddingV2.variables  s     	??rJ   r  c                 j   
 t        D cg c]  }|j                   c}      d   j                  }|ft        j                  d   j
                  } j                        d fd	}fd

 fdt        j                  t         j                              5   |      }ddd       fd}|Dt        j                  t         j                              5  |j                  |      }	ddd       ni }		d<   |	S c c}w # 1 sw Y   cxY w# 1 sw Y   !xY w)	zBCreate all variables including table variables and slot variables.r   Nc           	         j                   j                  r2d   j                  |j                  d   d   j                  f|      S g }t        j                        D ]  }D ]  }t        j                  |j                        }d|j                  v xs d|j                  v }|rj                  ||j                  |j                  |      }|srD|j                  |j                  d      |   v r#|j                  d      |   |j                     }	nj                  |j                  |      \  }
}|j                  |j                  |j                  f|      |
d |d d f   }	nj|j                  |j                  |j                  f||      }	n?|j                  |j                  | d   z  z  j                  z  |j                  f|      }	|j!                  |	         t#        j$                  |d      S )Nr   rl  rZ  )r~   rz   rZ  axis)rB  r@   rs  r~   r  r   r;  r1   rr  rt  ru  rY  r^  rs   rf  r  r   r   concat)r~   rz   rZ  concat_tensorsr{  r   r}  r~  r  r  rd  r  host_table_tensorsr\   rg  rS  s               rK   table_initialize_fnz=TPUEmbeddingV2._create_variables.<locals>.table_initialize_fn8  sJ    
	+	+	L	La ,,##A&q(9(=(=> - 
 	
 n
 T**+ 5*! $ 1	*E..u/@/@A(hmm+ 5!4!44 
  77  !! M "$jj$6z7H7H7K$LQ$OO-j.?.?.BCAFJJ
 99

M *\
 !,, 00%))<E - +|+Q.0
 **..		:* + h ((**U1X5'(,,- II	  ) H 


)c1	*	5*l n155rJ   c                 j    ~t        j                  ||      }t        j                  | |||      S )Nrl  )rs   initial_valuer~   rz   	trainable)r  partialr   r   )rs   r~   rz   rs  r  r  variable_shapes         rK   getterz0TPUEmbeddingV2._create_variables.<locals>.getter  sA    
''
^5m
 ""% rJ   c                 2    j                  | |d      S )NF)rs   rs  r~   rz   r  r  ) _add_variable_with_custom_getter)rs   rs  r  r\   variable_dtyper  s     rK   variable_creatorz:TPUEmbeddingV2._create_variables.<locals>.variable_creator  s/     22! 3  rJ   c                 "     dz   | z   |      S )N/rI   )rs   rs  r  r  s     rK   slot_creatorz6TPUEmbeddingV2._create_variables.<locals>.slot_creator  s    036={KKrJ   r  rb   )sumr  r  r   float32r	  r  r#   variable_creator_scopemake_sharded_variable_creatorr0  _create_slots)r\   rg  r  r   	table_dimr	  r  r  r  	slot_varsr  r  rS  r  r  r  s   ```       @@@@@@rK   _create_variablesz TPUEmbeddingV2._create_variables&  sH    ~NeE11NOq!%%I&	2N^^Nq!++I 55(..E6 E6N 
	.	.%dnn5
 M $$68KLjM
L 00
'
7 F ++JE	F F
 i(Il O^M MF Fs   D1
D3D)D&)D2c                 z    i }| j                   j                         D ]  \  }}| j                  ||      ||<    |S )zCreate variables for TPU embeddings.

    Returns:
      A dict of dicts. The outer dict is keyed by the table names and the inner
      dicts are keyed by 'parameters' and the slot variable names.
    )r  )rE  r  r  )r\   r$   r  r  s       rK   _create_variables_and_slotsz*TPUEmbeddingV2._create_variables_and_slots  sV     I&*&C&C&I&I&K "F&*&<&<
%7 '= 'i"# rJ   c           	          t        j                         }|j                  j                  | j                  j                                t        j                  dt        |j                               t        j                  d      5  | j                  t        j                  t        j                  |j!                         t"        j$                              t        j&                         d d d        y # 1 sw Y   y xY w)Nz(Saving sparse core layouts for %s tablesz/cpu:0rz   )r   SparseCoreTableLayoutsr  r  r  r   r   rZ   r}   r   r   _track_trackabler*   SparseCoreLayoutsTrackabler   constantSerializeToStringr   string!SPARSECORE_LAYOUTS_CHECKPOINT_KEY)r\   layoutss     rK   _track_restore_info_for_cpuz*TPUEmbeddingV2._track_restore_info_for_cpu  s    $;;=GNN$00779:LL2C4G 
H	 

 
;
;""++-V]]
 !
B
B  s   A%C44C=c                 @    t         j                  j                  |      S rb   )r)   TpuEmbeddingV3CheckpointAdaptercreate_from_checkpoint)r\   paths     rK   _checkpoint_adapterz"TPUEmbeddingV2._checkpoint_adapter  s    .NNee rJ   c                     | j                   s.t        j                         5  | j                          d d d        y y # 1 sw Y   y xY wrb   )_builtr   
init_scopebuildr   s    rK   r  zTPUEmbeddingV2._maybe_build  s=    ;; >> 

   s	   <Ac                 v    | j                   ry| j                         | _        | j                          d| _         y)z8Create variables and slots variables for TPU embeddings.NT)r  r  r  r  r   s    rK   r  zTPUEmbeddingV2.build  s/    {{668DO$$&DKrJ   	gradientspreserved_outputsc                    | j                   st        d      t        j                  | j                  |       | j                  |      }t        t        | j                        }|j                          t        j                  fd}t        |j                               d   j                  }| j                  D ]  }||   }||   }| j                   |   d   }	| j                  |   d   j"                  }
t%        |
t&        j(                        rt+        j,                  |j.                  |j0                  |j2                  |j4                  | ||
j6                        |	j9                         || j:                  |   | j<                  |   |      }|	j?                  |       t%        |
t&        j@                        r| j                   |   d   }t+        jB                  |j.                  |j0                  |j2                  |j4                  | ||
j6                        |	j9                         |j9                         || j:                  |   | j<                  |   |      \  }}|j?                  |       |	j?                  |       t%        |
t&        jD                        rT| j                   |   d   }| j                   |   d   }t+        jF                  d&i d	|j.                  d
|j0                  d|j2                  d|j4                  d|d ||
j6                        d|	j9                         d|j9                         d|j9                         d|d|
jH                  d|
jJ                  d|
jL                  d|
jN                  d|
jP                  d| j:                  |   d| j<                  |   d|\  }}}|j?                  |       |j?                  |       |	j?                  |       9t%        |
t&        jR                        rG| j                   |   d   }| j                   |   d   }t+        jT                  d&i d	|j.                  d
|j0                  d|j2                  d|j4                  d|d ||
j6                        d|	j9                         d|j9                         d|j9                         d|d|
jV                  d|
jX                  d|
jZ                  d|
jP                  d| j:                  |   d| j<                  |   d|\  }}}|j?                  |       |j?                  |       |	j?                  |       t%        |
t&        j\                        rT| j                   |   d   }| j                   |   d   }t+        j^                  d&i d	|j.                  d
|j0                  d|j2                  d|j4                  d|d ||
j6                        d|	j9                         d|j9                         d|j9                         d|d|
j`                  d |
jb                  d!|
jd                  d"|
jf                  d#|
jh                  d| j:                  |   d| j<                  |   d|\  }}}|j?                  |       |j?                  |       |	j?                  |       
tk        d$       |jm                          y%)'ap  Applies the gradient update to the embedding tables.

    If a gradient of `None` is passed in any position of the nested structure,
    then a gradient update with a zero gradient is applied for that feature.
    For optimizers like SGD or Adagrad, this is the same as applying no update
    at all. For lazy Adam and other sparsely applied optimizers with decay,
    ensure you understand the effect of applying a zero gradient.

    Args:
      gradients: A nested structure of gradients, with structure matching the
        `feature_config` passed to this object.
      preserved_outputs: A dicts of PartitionedCsrFormatTensor, coming from the
        second output of the embedding lookup call.

    Raises:
      RuntimeError: if not built.
      ValueError: If a non-`tf.Tensor` non-`None` gradient is passed in, or a
        `tf.Tensor` of the incorrect shape is passed in. Also if
        the size of any sequence in `gradients` does not match corresponding
        sequence in `feature_config`.
      TypeError: If the type of any sequence in `gradients` does not match
        corresponding sequence in `feature_config`.
    zapply_gradients called on unbuilt TPUEmbeddingV2 object. Please either call the embedding lookup method first or manually call the build method.c                 ~    t        |       rt        j                   |        |      } t        j                  | |      S )Nr  )callabler!   castr   convert_to_tensor)paramrz   s     rK   _wrap_paramz3TPUEmbeddingV2.apply_gradients.<locals>._wrap_param  s.    	%egU3""566rJ   r   r  )r   r   r   r   activation_gradientslearning_rateembedding_tabler   max_ids_per_sparse_coremax_unique_ids_per_sparse_corer   accumulators)r   r   r   r   r  r  r  accumulatorr   r  r  r   momentar   r   r   r   r  r  r  r  r   use_nesterovexponentbeta1beta2epsilonr  r  r   
velocitiesvelocityuse_sum_inside_sqrtlinearslinear multiply_linear_by_learning_ratebetalearning_rate_powerl1_regularization_strengthl2_regularization_strengthz+Unsupported optimizer in minibatching mode.NrI   )7r  r4  r0   assert_same_structurer@  _stack_gradientsrM   _PIPELINE_MODE_BACKWARDrP  Enterr   r  r  r   r   rE  r$   r	  r1  r(   SGDxla_ops<xla_sparse_dense_matmul_grad_with_sgd_and_static_buffer_sizer   r   r   r   r  r   rM  rN  r   Adagrad@xla_sparse_dense_matmul_grad_with_adagrad_and_static_buffer_sizeAdagradMomentumIxla_sparse_dense_matmul_grad_with_adagrad_momentum_and_static_buffer_sizer  r  momentumr  r  Adam=xla_sparse_dense_matmul_grad_with_adam_and_static_buffer_sizesum_inside_sqrtbeta_1beta_2FTRL=xla_sparse_dense_matmul_grad_with_ftrl_and_static_buffer_sizer  r  r  r  r  
ValueErrorExit)r\   r  r  contextr  r   r   gradientpartitioned_tensorr   r	  updated_embedding_tabler  updated_accumulatorr  updated_momentar  updated_velocityr  updated_table_tensorupdated_accum_tensorupdated_linear_tensors                         rK   apply_gradientszTPUEmbeddingV2.apply_gradients  sa   8 ;; 
 	t33Y?
 %%i0I(!1!1G MMO!' 7 044E4L4L4N/O	0.. - 33 QH
:&h,Z8nnZ(6e//
;A>HHi	I599	:")"f"f+880BB/@@+88!)%i&=&=>!,,.5]$($J$J% ,0+X+X, "#
" 	,-i!7!?!?@~~j1.ATT/<<"4"F"F!3!D!D/<<%-))*A*AB % 0 0 2(3359a(,(N(N) 04/\/\0 &! 	5!4( 	/0,-i!7!G!GH~~j1.A..,Y7]] /<<"4"F"F "4!D!D 0<<	
 &. *)*A*AB !& 0 0 2 )335  **, :b '33 #++  ((  oo "))  )-(N(N)!& 04/\/\0', &- 	F!4o4 	'/0,-i!7!<!<=..,Y7>>*-l;QQ /<<"4"F"F "4!D!D 0<<	
 &. *)*A*AB !& 0 0 2  **, ",,. :b %.$=$=  &&  && ")) )-(N(N)$ 04/\/\0%* &+ 	C2B2 	()',-i!7!<!<=~~j1.A..,Y7QQ /<<"4"F"F "4!D!D 0<<	
 &. *)*A*AB !& 0 0 2 )335 ))+ :b 2;1[1[ ^^ %.$A$A ,5+O+O ,5+O+O  )-(N(N)!& 04/\/\0', &- 	L	35J4 	,-01)*FGGcQHf LLNrJ   featuresweightsc                 &    | j                  ||      S )z.Call the mid level api to do embedding lookup.)embedding_lookup)r\   r
  r  s      rK   __call__zTPUEmbeddingV2.__call__  s       733rJ   c                 d   t        j                         }d}|Q|j                         }|,t        |t        j
                        rd}n|j                  }|,|rnt        |dd      }|Q|t        j                         k7  r/|r-t        dj                  t        j                         |            |S )z9Raises an error if we are not in the TPUReplicateContext.FNTouter_graphaF  Current graph {} does not match graph which contains TPUReplicateContext {}. This is most likely due to the fact that enqueueing embedding data is called inside control flow or a tf.function inside `strategy.run`. This is not supported because outside compilation fails to extract the enqueue ops as the head of a computation.)
r   get_default_graph_get_control_flow_contextr1  r+   TPUReplicateContextouter_contextgetattrr4  r5  )r\   graph
in_tpu_ctxctxs       rK   /_raise_error_for_incorrect_control_flow_contextz>TPUEmbeddingV2._raise_error_for_incorrect_control_flow_context  s    
 !!#EJ

++-cOc?>>?*
	 O
 
e]D1e 
 %%''J
 "6#"7"7"95A  rJ   num_tpu_chipsnum_sc_per_chipc                 X   t        |      }g }t        j                  |      D ]  }	|j                  |	j                          |D ]  }
|
j
                  ||
_         t        j                  |      }t        |||||      }t        j                  |      }t        j                  |dgt        |      z  ||j                  |j                  |j                  ||j                  ||z  	      }|j                  D ci c]  }|d }}|j                  D ci c]  }|d }}|j                  D ]  }|j                  |   d   j                  }t!        |j                  |   D 
cg c]  }
|
j"                   c}
      }t%        |      D ]  }||   d   |   }||   d   |   }||   d   |   }||   d   }||   d   }t'        j(                  ||||||||||
      \  }}t+        j,                  ||   |      ||<   t+        j,                  ||   |      ||<     ||fS c c}w c c}w c c}
w )	zAComputes the max_ids/unique ids settings from the input features.N)	flat_inputsflat_weightsr   r   r   r   r  stacked_table_to_sample_countnum_sc_shardsr   r   r\     r.  )
row_ids_listcol_ids_list
gains_listsample_count_listcol_offset_listnum_replicatable_vocab_sizefeature_widthr  r   )r   r0   r   r   r   r	  r?  r(  r*  5_preprocess_inputs_and_weights_to_list_of_coo_tensorsr}   r   r   r   r   r  r  r  r   r  .get_stats_from_list_of_sparse_core_coo_tensorsr!   maximum)clsr
  r   r  r  r	  r   copy_feature_configr   r   r   r   rR   r  table_to_list_of_coosr    table_to_max_ids_per_sparse_core'table_to_max_unique_ids_per_sparse_corer)  rS  r{  r"  r#  r$  r%  r&  r  r  s                               rK   compute_sparse_core_statsz(TPUEmbeddingV2.compute_sparse_core_stats  s    0?L<< 34 )'--()  $		 #$ 99:MNM7$	A ,,x(K 	LL##k"22'$%$=$=*+*I*I%&%?%?+*+*A*A)M9 	M 
	
  )*(A(A($
A($ ( )*(A(A/$
A/+ / // %

//
;A>BBm00< 

 	 _% 
!,Z8;A>,Z8;A>*:6q9!<
1*=a@/
;A>
 BB%%!/+%-'+!
	
#* 8@7G7G,Z8#8
(4 ?G>N>N3J?*?
/
;7
%
N 	)/ [(/s   ;
H
H""H'
r   c                 T   t        j                  | j                  |       t        j                  |      }dgt	        |      z  }|5t        j                  | j                  |       t        j                  |      }| j                         }|rt        j                  | j                  | j                  j                  | j                  | j                  | j                  | j                  | j                  | j                   | j"                  | j$                  | j&                  ||      S || j                  j(                  j*                  }t-        j.                  t1        j2                  |d   d               5  | j                  | j                  j                  | j                  | j                  | j                  | j                  | j                  | j                   | j"                  | j$                  | j&                  ||      cddd       S t4        j6                  j9                  |      }|j:                  dk7  rt=        dj?                  |            t-        j.                  t1        j2                  |            5  | j                  | j                  j                  | j                  | j                  | j                  | j                  | j                  | j                   | j"                  | j$                  | j&                  ||      cddd       S # 1 sw Y   yxY w# 1 sw Y   yxY w)z#Preprocessing the features on host.N)r=  r;   rL  r  r   r   r   r   r   r   r  r  r   TPUz$Non-TPU device {} passed to enqueue.) r0   r  r@  r   r}   r  r+   outside_compilation_preprocess_featuresr0  r=  r;   rL  r;  r>  rE  rH  rI  rJ  rA  r   r]  r   r   r   get_host_for_devicerm  rn  ro  rj  r  r5  )	r\   r
  r  r   r  r  in_tpu_contextr_  device_specs	            rK   enqueuezTPUEmbeddingV2.enqueue8  s    	t33X>,,x(K6C,,L
  !5!5w?\\'*lIIKN00

#
##~~BB&*&F&F!%!<!<//++"&"?"?(,(K(K $ ; ;#'#A#A++!#  
 NN++88k::k55k!nQ6GHI 
((!%!D!D(,(H(H#'#>#> 11--$($A$A*.*M*M"&"="=%)%C%C--#% ) 

 
  ((44V<k		 	 E	)?FFvNOO::k55f=> 
((!%!D!D(,(H(H#'#>#> 11--$($A$A*.*M*M"&"="=%)%C%C--#% ) 

 
)
 
(
 
s   8BL=BLLL'partitioned_tensorsc           	      j   i }|D ]  }||   d   }||   d   }||   d   }t        j                  |j                  |j                  |j                  |j
                  g||||g      \  }}}	}
t        j                  |||	|
g      \  }}}	}
t        |||	|
|j                  |j                        ||<    |S )zCopy tensors to device.r   r   r\  r   )
r  tpu_copy_with_dynamic_shaper   r   r   r   'tpu_annotate_tensors_with_dynamic_shaper   r   r   )r\   r;  partitioned_device_tensorsr   r  row_pointers_unpadded_sizeids_unpadded_sizer   r   r   r   s              rK   _copy_tensors_to_devicez&TPUEmbeddingV2._copy_tensors_to_device  s   
 "$) )
.z:1=#6z#B1#E -j9!< 
-
-$11$66$55$11	 -###	 Fl%'7& 
9
9	;  Fl%'7 0J#-+#)66 II	0 ,A)T &%rJ   c                 0   t        t        | j                        }|j                          t	        j
                  | j                  |      }i }t        |j                               d   j                  }| j                  D ]  }||   }| j                  |   d   }| j                  |   }t        |t              st        dt!        |       d      t#        j$                  |j&                  |j(                  |j*                  |j,                  | j.                  |   |||r|j0                  nd|r|j2                  nd|r|j4                  nd| j6                  |   | j8                  |   |      }	|	||<    |j;                          | j=                  |      }||fS )zPerform embedding lookup.)r;  r   r  z*Expect PartitionedCsrFormatTensor but get .)r   r   r   r   
input_sizer  r   quantization_config_lowquantization_config_highquantization_config_num_bucketsr  r  r   )rM   _PIPELINE_MODE_FORWARDrP  r  r+   r5  rB  r  r   r   rE  r$   rK  r1  r   r  typer  /xla_sparse_dense_matmul_with_static_buffer_sizer   r   r   r   rI  loweruppernum_bucketsrM  rN  r  _unstack_activations)
r\   r;  r  activationsr   r   r  r   r  
activations
             rK   dequeuezTPUEmbeddingV2.dequeue  s    ) 0 0G MMO)==$$/
 K 04""$00		1	1 - 33 $+
.z:nnZ(6e 66zB*,FG'(),
 	
 JJ)66.@@->>)6600<3[+>!''A ,?!''A 2E!--!"&"H"H# *.)V)V*  /j4 !+k*I$+L LLN ++K8K,--rJ   c                     | j                   s| j                          t        t        | j                        }|j                          | j                  ||      }|j                          | j                  |      }|S )a  Perform embedding lookup on the input feature.

    Args:
      features: A nested structure of `tf.Tensor`s, `tf.SparseTensor`s or
        `tf.RaggedTensor`s, with the same structure as `feature_config`. Inputs
        will be downcast to `tf.int32`. Only one type out of `tf.SparseTensor`
        or `tf.RaggedTensor` is supported per call.
      weights: If not `None`, a nested structure of `tf.Tensor`s,
        `tf.SparseTensor`s or `tf.RaggedTensor`s, matching the above, except
        that the tensors should be of float type (and they will be downcast to
        `tf.float32`). For `tf.SparseTensor`s we assume the `indices` are the
        same for the parallel entries from `features` and similarly for
        `tf.RaggedTensor`s we assume the row_splits are the same.

    Raises:
      ValueError: If the input feature is not one of the Tensor, SparseTensor or
        RaggedTensor type.
      TypeError: If the type of any sequence in `features` does not match
        corresponding sequence in `feature_config`. Similarly for `weights`, if
        not `None`.

    Returns:
      packed_activations: Embedding lookup results packed as the same sequence
        of the input feature.
      packed_output: A dict of PartitionedCsrFormatTensors.
    )	r  r  rM   rI  rP  r  r:  r  rR  )r\   r
  r  r  r;  results         rK   r  zTPUEmbeddingV2.embedding_lookup  se    : ;;
( 0 0G MMO,,x9LLN\\-.FMrJ   r=  r;   rL  r   r   r   r   r   r   r  r  c                     t         j                  |||
|||	|||	      }| j                  ||||      }| j                  |||||||      }|S )z Function to preprocess features.)r*  r*  _sort_list_of_coo_tensors+_get_csr_wrapped_coo_from_sorted_coo_tensor)r\   r=  r;   rL  r  r   r   r   r   r   r   r  r  r/  table_to_sorted_coo_tensortable_to_csr_format_tensors                   rK   r6  z#TPUEmbeddingV2._preprocess_features(  s    $ 	LL#)$!
	
  "&!?!?	" 	88 '"&#!	
  &%rJ   input_featureweight
row_offset
col_offset	col_shift
vocab_sizestacked_table_sample_countc                 &   t        j                  t        j                  |j                        }t        |t        j                        r*t        j                  |dg      }|&t        j                  |t        j                        }nIt        |t        j                        rt        j                  |dg      }nt        dt        |             t        j                   t        j"                  dt        j$                        t'        j(                  |t        j$                        t'        j(                  |t        j                        ||j*                  j,                  |||||	|
      \  }}}nt        |t.        j0                        r&|0t        j                  |j2                  t        j                        }n>t        |t.        j0                        r|j2                  }nt        dt        |             t        j                   t'        j(                  |j4                  t        j$                        t'        j(                  |j2                  t        j$                        t'        j(                  |t        j                        ||j*                  j,                  |||||	|
      \  }}}nXt        |t6        j8                        r%|s0t        j                  |j2                  t        j                        }n>t        |t6        j8                        r|j2                  }nt        dt        |             t        j                   t'        j(                  |j:                  t        j$                        t'        j(                  |j2                  t        j$                        t'        j(                  |t        j                        ||j*                  j,                  |||||	|
      \  }}}nt        dt        |       d	      ||||fS )
z8Convert any of the expected input types to a COO format.r  z(Expect weight to be Tensor type but got )r   )indices_or_row_splitsr   r  r   combinerr  r\  r]  r^  r   r`  z.Expect weight to be SparseTensor type but got z.Expect weight to be RaggedTensor type but got zInput of unknown type zU. Please only pass Tensor, SparseTensor or RaggedTensor as input to embedding lookup.)r  r  r  r  r  r1  r   r   r   reshape	ones_liker   r  r  rJ  r  *convert_to_list_of_sparse_core_coo_tensorszerosint32r!   r  r   rd  r   SparseTensorr   indicesr%   RaggedTensor
row_splits)r-  rZ  r[  r   r\  r]  r^  r_  r  r   r`  r   r"  r#  r$  s                  rK   -_convert_input_feature_to_list_of_coo_tensorsz<TPUEmbeddingV2._convert_input_feature_to_list_of_coo_tensors]  s   " ##HLL.2M2MNL-/''t<m	$$]&..Iffmm,""6B406tF|nE
 	
 
<
<$-OOD$M]]=EmmFFNN;'%++44-##!))C -lL* 
M=#=#=	>	$$]%9%9Pfm889<T&\NK
 	
 
<
<$,MM''v||% ]]=#7#7v||LmmFFNN;'%++44-##!))C -lL*" 
M=#=#=	>$$]%9%9Pfm889<T&\NK
 	
 
<
<$,MM**&,,% ]]=#7#7v||LmmFFNN;'%++44-##!))C -lL*$ "4#6"7 8  
 z<??rJ   r  c
                 t   |D 
ci c]  }
|
g g g g gg g g g gg g g g gg g f }}
t        |||      D ]   \  }}\  }}||j                  j                     \  }
}}||
   }||   }t        j	                  |||||||j                  j
                  ||	|
      \  }}}}t        |      D ]Y  }||
   d   |   j                  ||          ||
   d   |   j                  ||          ||
   d   |   j                  ||          [ ||
   d   j                  ||z         ||
   d   j                  |        |S c c}
w )z0Convert the raw inputs into list of coo tensors.r   r   r\  r!  r.  )zipr   rs   r*  rn  r  r   r   )r-  r  r  r   r   r   r   r  r  r   r   r/  inpr[  r'  r   r]  r^  r`  r\  r"  r#  r$  r   r{  s                            rK   r*  zDTPUEmbeddingV2._preprocess_inputs_and_weights_to_list_of_coo_tensors  s   , 2	  	RRR
 	
	 	 14\=1 >,V,lG +H
--

+'j*i $A#L +L9j 
F
Fmm++( ;lL*l _% F!j)!,Q/66|AGj)!,Q/66|AGj)!,Q/66z!}EF J'*11
/
) J'*11*==>> ! S	s   D5r/  c                 ~   |D ci c]	  }|g g g g f }}|D ]  }||   d   j                   }t        ||   D cg c]  }|j                   c}      }	t        |      D ]  }
||   d   |
   }||   d   |
   }||   d   |
   }||   d   }||   d   }t	        j
                  |||||||	||| j                  |   | j                  |   |      \  }}}}||   d   j                  |       ||   d   j                  |       ||   d   j                  |       ||   d   j                  |         |S c c}w c c}w )z Sort the coo tensors by replica.r   r   r\  r!  r.  )r"  r#  r$  r%  r&  r'  r(  r)  r  r  r  r   )	r  r  r  r   r  $sort_list_of_sparse_core_coo_tensorsrM  rN  r   )r\   r=  r/  r   r  r   rX  r)  r   rS  r{  r"  r#  r$  r%  r&  sorted_row_idssorted_col_idsr   	id_countss                       rK   rV  z(TPUEmbeddingV2._sort_list_of_coo_tensors  s    8O")3
RR$$" " . )D
-j9!<@@m-DZ-P$)%

 	 _% "D!,Z8;A>,Z8;A>*:6q9!<
1*=a@/
;A> 88%%!/+,-'+$($J$J% ,0+X+X, "!
	
( 	#:.q188H":.q188H":.q188F":.q188CE"D)DV &%]"s
   D5D:
rX  c                    i }|D ]  }	||	   \  }
}}}||	   d   j                   }t        ||	   D cg c]  }|j                   c}      }t        j                  |
|||t        j                  dt        j                        ||	   |z  ||||||	| j                  j                        \  }}}}}}}t        ||||||	   |      ||f||	<    |S c c}w )z6Get csr wrapped coo tensor from the sorted coo tensor.r   r  )sorted_row_ids_listsorted_col_ids_listsorted_gains_listid_counts_listsplitssample_count_per_scr'  rL  r;   r(  r)  r   r>   r   )r  r  r  r  -convert_to_sparse_core_csr_wrapped_coo_tensorr   r  r   int64rB  r>   r   )r\   r=  r;   rL  rX  r   r   r  rY  r   rx  ry  rz  r{  r)  r   rS  r   r   r   r   r@  rA  r   s                           rK   rW  z:TPUEmbeddingV2._get_csr_wrapped_coo_from_sorted_coo_tensor5  s:    "$- 3
 %Z
0



 .j9!<@@m-DZ-P$)%

 	 
?
?11-'%%v|| 4J?*!7&A+% >>PP!




$

2( %' 1/'0<7_ %
0 ,Q3h &%Ss   C
rP  c           	         g }| j                   D ci c]  }|d }}| j                   D ]=  }||   j                  }t        j                  ||   | j                  d|d   g      ||<   ? | j
                  D ]=  \  }}t        j                  t        j                  |j                        }| j                  |j                  j                     d   }| j                  |j                  j                     }	t        j                  ||   d||   dg| j                  || j                  z  |j                  j                   |	z
  g      }
t        j                  |
t#        |j                        |j                  j                   |	z
  gz         }
|j%                  |
       ||xx   || j                  z  z  cc<   @ t'        j(                  | j*                  |      S c c}w )z;Untack the incoming per table activations into per feature.r   rb  )rE  r~   r   re  r;  rA  r  r  r  r  r  rH  r   rs   rF  slicer  r  r   r0   r   r@  )r\   rP  flattened_activationsr   table_to_current_offsetactivation_shaper!  r   r   
extra_colsrQ  s              rK   rO  z#TPUEmbeddingV2._unstack_activationsw  s   
 (,(E(E$
A  33 
$Z066 ) 1 1
j
!  "&6r&:
;!k* )) 
7%%hllG4H4HIl66w}}7I7IJ1Mj11'--2D2DEj??
j
!%j11
5##d333mm*,j $$

w##
$(9(9J(F'G
Gj "":.j)
$//
/)'.   !5!57LMMAs   
Gc           
      P   | j                   D ci c]	  }|g g g g g }}t        j                  |      }t        || j                        D ]  \  }\  }}t        j                  t        j                  |j                        }|5t        |t        j                        st        dt        |       d| d      |Qt        j                   d|       t#        j$                  ||j&                  j(                  ft*        j,                        }| j.                  |j&                  j0                     d   }| j2                  |j&                  j0                     }	t#        j4                  |d|j&                  j(                  |	z
  g      }|	dk7  rCt#        j6                  |ddgd|	gg      }|j9                  ||j&                  j(                  g       || j:                  z  }
t=        | j:                        D ]I  }||   |   j?                  t#        j@                  |||
z  dg|
|j&                  j(                  g             K  |D ]X  }t#        jB                  t=        | j:                        D cg c]  }t#        jB                  ||   |   d      ! c}d      ||<   Z |S c c}w c c}w )	z4Stack the incoming gradients to per table gradients.zfound non-tensor type: z	 at path rD  z|No gradient passed for feature %s, sending zero gradient. This may not be correct behavior for certain optimizers like Adam.r  r   rb  r  )"rE  r0   r   rp  rA  r  r  r  r  r  r1  r   r   r  rJ  r   r<  r   rh  r   r  r   r  rH  rs   rF  re  pad	set_shaper;  r   r   r  r  )r\   r  r   table_to_gradient_listflattend_gradientsr   r  r   r   r  per_sc_sample_countr{  s               rK   r  zTPUEmbeddingV2._stack_gradients  s    77 	RR$$  i0%(D//& *
!/4 %%hllG4H4HIl		j6==&I%d8n%5YtfAF
 	
 
	 	( 	
 ??7==,,-V^^
 66w}}7I7IJ1Mj11'--2D2DEj""
R**Z78h 
q==QFQ
O+DEL'--*;*;<=(D,A,AAT**+ 

!z*1-44OO((!,'MM%%		


A*
V - 
+4+;+; T223 5jA!D1M ,Z( "!qds   J*$J#
)NFN)r   Nrb   )r.  NN)NN)FrA   rB   rC   rD   r  r  r   r(   FeatureConfigr   r   
_OptimizerrE   r8   rT   r   tuplerG   r   r-   rV  rY  rH   rf  r   DTyper   r   r   r  rO  r   r   r   r  r  r   r   r  r$   r  r  r  r  r  r  r   r   r	  r
   r  r  classmethodr2  r:  rB  rR  r  r6  r   rj  r%   rl  rn  r*  rV  rW  rO  r  rk   rl   s   @rK   r*  r*    s   S!%("
 @D27JNq;2@@(JKq; 0;;<q; ,0	q;
 %--F$Gq;fLL S#XL S		L
 L L ~~L0 .. S#X	"F;'F F S#X	F
 \\F Cd3-.//0FP(
T 	"..0E0EEF	 	 "..\5J5J0KKL ( #C'===># # (($sL4I4I/I*JJ E1==>E E C&&&'	ENCc<000112  LL c#==>L^ 5944$,SM4S$s6677848  ?CJN^^ M834^ 	^
 ^ 0;;<^ %--F$G^ S#X^ ^F  $"	I
I
 }I
 sm	I

 I
V0&S>0& 0&dD. 
s..
/c
9D.
 S$s66778D.N 59++$,SM+S$s66778+t %)3&3& $'3& "	3&
 3& 3&  $CH~3& &*#uS#s]/C*C%D3& "#s(^3& !%S#X3& 3& 3& SM3& 3&j d@
--33]5O5O
Od@
 v}}%d@ -::d@ d@ d@ d@ d@ d@ d@ #&d@ d@ d@L 6!6! 6! 	6!
  $CH~6! &*#uS#s]/C*C%D6! !%S#X6! 6! &*#s(^6! 6! CH~6! 6!p6&6& "#s(^6&  $CH~	6&
 6& T#s(^T&--0016&p@&@& $'@& "	@&
 #'sCx.@&  $CH~@& "#s(^@& @& @&D&Nd33E.F &NP="rJ   r*  kwargs.c                 T   dt         dt        f   dt        dz  fd}t        | d   t        j
                        rd| d   j                  v s| d   j                  rd| d   j                  v r| d   j                  d   }n| d   j                  d   }| d   || d   j                  j                  d	| d	         | d   j                   || d   j                        fS d| vs| d   t        | d         st        d
j                  | d               | d   | d   | d	   | d    || d         fS )a  Extracts the variable creation attributes from the kwargs.

  Args:
    kwargs: a dict of keyword arguments that were passed to a variable creator
      scope.

  Returns:
    A tuple of variable name, shape, dtype, initialization function,
    restore_uid.
  r  .r   Nc                     t        | dd       S )Nrestore_uid)r  r  s    rK   get_restore_uidz.extract_variable_info.<locals>.get_restore_uid  s    =-66rJ   r~   r   rs   rz   a2  Unable to extract initializer function and shape from {}. Please either pass a function that expects a shape and dtype as the initial value for your variable or functools.partial object with the shape and dtype kwargs set. This is needed so that we can initialize the shards of the ShardedVariable locally.)r   r   rG   r1  r  r  keywordsrt  getfuncr  r  r5  )r  r  r~   s      rK   extract_variable_infor    sn    7Xc3h%7 7C$J 7 '):):;(111		 	%	% &)222_%..w7e_%**1-ev((,,WfWoF$$/445  V		 f_-.
	@ AG?#A
  	vww/0 rJ   r  c                     t        | t        j                        xs\ t        | t        j                        xs@ t        | t        j
                        xr$ t        | j                  t        j                        S )z-Whether the initial value is from checkpoint.)r1  r-   CheckpointInitialValueCheckpointInitialValueCallabler  r  r  r  s    rK   is_checkpoint_initial_valuer    sc      ; ;< 	M4#F#F	G ]I$5$5
6   $"E"E	rJ   strategyc                 B      j                   j                   fd}|S )zCreate a variable creator which shards across all the tpu device.

  Args:
    strategy: a TPUStrategy object.

  Returns:
    The sharded variable creator.
  c           
         d|d<   d}j                   \  }}t        |d         }t        j                  |d         }|r'd|j                  vrd|j
                  vrt        d      t        |      \  }}	}
}}t        j                  j                  |	      }	||z  }|	|   |z  dk7  rt        dj                  |	|            |	j                         }||   |z  ||<   t        j                  |      }d|j                  v }g }dgt        |	      z  }t        |      D ]  }t        |      D ]  }t        j                   |   |         5  | d| |d	<   ||d
<   |rjt#        j$                  t        j&                  |      t)        j*                  |            }t-        j.                  |d   |      |d<   ||xx   ||   z  cc<   nt-        j.                  |||
      |d<   |j1                   | |i |       ddd         t3        |t4        j6                  j8                  d      }||j;                          ||_        |S # 1 sw Y   #xY w)z%Create a TPUEmbeddingShardedVariable.Tskip_mirrored_creatorr   r  rZ  zoWhen a sharded variable is initialized from a checkpoint, shard_info must be in arguments of the init function.zWOnly evenly sharding across devices is currently supported. Got shape {} and {} devicesr  rs   r~   )rZ  rl  N)r~   r  r1   rr  rt  ru  r  r  r   r   r   r5  r   
getargspecr}   r   r   r-   rV  as_shaper   deepcopyr  r  r   r   r   VariableAggregationNONE_maybe_initialize_trackable_update_uid)next_creatorrt  r  rr   r`  ra  is_ckpt_init_valuer}  rs   r~   rz   unwrapped_initial_valuer  rb  rQ  unwrapped_arg_specr~  r$   rR  
replica_idlogic_core_idrZ  rT  r  r_  s                          rK   _create_sharded_variablez?make_sharded_variable_creator.<locals>._create_sharded_variable7  s    '+F"# I*5*;*;'L'4VO5LM(()@AH- 3 33B  	f% =D%/ ((/E!66KY+%*((.uk(B 
 mmoO!0!;{!JOI#../FG!%7%<%<<NIsSZ'L) :
 !67 :-ZZJ/>? 	:"V1ZL1&.+&/%%o6./J '0&7&7'J'F?# Y'?9+EE'&/&7&7'e'F?# 

<88
9#	: 	:::* ))\==BBDF ((*&fM3	: 	:s   B.II)r   r]  )r  r  r_  s   ` @rK   r  r  *  s%     !!..+HT 
"!rJ   rb   )irD   r  r   r   r  r  r  typingr   r   r   r   r   r   r	   r
   r   abslr   tensorflow.core.frameworkr   tensorflow.core.tpu.kernelsr   tensorflow.python.checkpointr   tensorflow.python.distributer   r   r   r   r   r   r   tensorflow.python.frameworkr   r   rm  r   r   r   r   r   tensorflow.python.opsr   r   r    r!   r"   r#   r$   r   tensorflow.python.ops.raggedr%   tensorflow.python.tpur&   r'   r(   r)   r*   r+   tensorflow.python.tpu.opsr,   r  tensorflow.python.trackabler-   !tensorflow.python.training.savingr.   tensorflow.python.utilr/   r0   r1    tensorflow.python.util.tf_exportr2   rg   rI  r  r   r  r   	dataclassr8   ControlFlowContextrM   SaveableObjectrn   r   r   legacy_saveable_nameTPUVariableMixinDistributedVariabler   
namedtupler   r   rG   r(  TPUEmbeddingBaser*  rH   r  r  rE   r  Strategyr  rI   rJ   rK   <module>r     s   F       X X X  4 > 8 4 7 5 1 3 / 4 3 ; . + 5 . 4 + 2 ; * 0 0 ; 6 < 4 8 E 8 1 < , = ) ' - 6 . " $  %00&22+>>  ABd#	1 	1 $ C	1$!1!D!D $@./"@"@ .b1 : : :, &%%b)k!;!;k *k^ 4[33 
 =& IME;'EE#}"456E E 	E
 #++D"EE ER 67@"'88 @" 8@"J088
sCx&,,S(98C=H8vs t W"%%W"c3hW"rJ   