
    AVhޓ                        d Z ddlZddlZddlmZ ddlZddlZddlm	Z	 ddlm
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z! ddl"m#Z# ddl$m%Z% ddl&m'Z'  e'dg        G d de(             Z) e'dg        G d de)             Z* e'd g        G d! d"e)             Z+ e'd#g        G d$ d%e)             Z, G d& d'ejZ                        Z. G d( d)e!j^                        Z0 e'd*g        G d+ d,e0e	jb                               Z2d1d-Z3 ejh                  e2e3       e2jk                           e%jl                  ejn                  e2      	 	 	 	 d2d.       Z7 e%jp                  ejr                        	 	 	 	 	 	 	 d3d/e2fd0       Z9y)4zShardedVariable class.    N)Sequence)composite_tensor)constant_op)dtypes)indexed_slices)ops)tensor)tensor_conversion_registry)tensor_shape)	type_spec)	array_ops)data_flow_ops)embedding_ops)math_ops)partitioned_variables)resource_variable_ops)	variables)save_context)base)saveable_object_util)dispatch)	tf_exportz0distribute.experimental.partitioners.Partitioner)v1c                       e Zd ZdZddZy)Partitionerak  Partitioner base class: all partitiners inherit from this class.

  Partitioners should implement a `__call__` method with the following
  signature:

  ```python
  def __call__(self, shape, dtype, axis=0):
    # Partitions the given `shape` and returns the partition results.
    # See docstring of `__call__` method for the format of partition results.
  ```
  c                     t         )a  Partitions the given `shape` and returns the partition results.

    Examples of a partitioner that allocates a fixed number of shards:

    ```python
    partitioner = FixedShardsPartitioner(num_shards=2)
    partitions = partitioner(tf.TensorShape([10, 3], tf.float32), axis=0)
    print(partitions) # [2, 0]
    ```

    Args:
      shape: a `tf.TensorShape`, the shape to partition.
      dtype: a `tf.dtypes.Dtype` indicating the type of the partition value.
      axis: The axis to partition along.  Default: outermost axis.

    Returns:
      A list of integers representing the number of partitions on each axis,
      where i-th value corresponds to i-th axis.
    )NotImplementedErrorselfshapedtypeaxiss       ]/home/dcms/DCMS/lib/python3.12/site-packages/tensorflow/python/distribute/sharded_variable.py__call__zPartitioner.__call__<   s
    (     Nr   )__name__
__module____qualname____doc__r$    r%   r#   r   r   .   s    
r%   r   z;distribute.experimental.partitioners.FixedShardsPartitionerc                       e Zd ZdZd ZddZy)FixedShardsPartitionera  Partitioner that allocates a fixed number of shards.

  Examples:

  >>> # standalone usage:
  >>> partitioner = FixedShardsPartitioner(num_shards=2)
  >>> partitions = partitioner(tf.TensorShape([10, 3]), tf.float32)
  >>> [2, 1]
  >>>
  >>> # use in ParameterServerStrategy
  >>> # strategy = tf.distribute.experimental.ParameterServerStrategy(
  >>> #   cluster_resolver=cluster_resolver, variable_partitioner=partitioner)
  c                     || _         y)zoCreates a new `FixedShardsPartitioner`.

    Args:
      num_shards: `int`, number of shards to partition.
    N)_num_shards)r   
num_shardss     r#   __init__zFixedShardsPartitioner.__init__c   s     "Dr%   c                     ~dgt        |      z  }t        | j                  |j                  |   j                        ||<   |S )N   )lenminr/   dimsvalue)r   r    r!   r"   results        r#   r$   zFixedShardsPartitioner.__call__k   s?    S3u:Ft''D)9)?)?@F4LMr%   Nr&   r'   r(   r)   r*   r1   r$   r+   r%   r#   r-   r-   S   s    "r%   r-   z7distribute.experimental.partitioners.MinSizePartitionerc                   "    e Zd ZdZ	 ddZddZy)MinSizePartitionera1  Partitioner that allocates a minimum size per shard.

  This partitioner ensures each shard has at least `min_shard_bytes`, and tries
  to allocate as many shards as possible, i.e., keeping shard size as small as
  possible. The maximum number of such shards (upper bound) is given by
  `max_shards`.

  Examples:

  >>> partitioner = MinSizePartitioner(min_shard_bytes=4, max_shards=2)
  >>> partitions = partitioner(tf.TensorShape([6, 1]), tf.float32)
  >>> [2, 1]
  >>> partitioner = MinSizePartitioner(min_shard_bytes=4, max_shards=10)
  >>> partitions = partitioner(tf.TensorShape([6, 1]), tf.float32)
  >>> [6, 1]
  >>>
  >>> # use in ParameterServerStrategy
  >>> # strategy = tf.distribute.experimental.ParameterServerStrategy(
  >>> #   cluster_resolver=cluster_resolver, variable_partitioner=partitioner)
  c                     |dk  rt        d|       |dk  rt        d|       |dk  rt        d|       || _        || _        || _        y)a@  Creates a new `MinSizePartitioner`.

    Args:
      min_shard_bytes: Minimum bytes of each shard. Defaults to 256K.
      max_shards: Upper bound on the number of shards. Defaults to 1.
      bytes_per_string: If the partition value is of type string, this provides
        an estimate of how large each string is.
    r3   z7Argument `min_shard_bytes` must be positive. Received: z2Argument `max_shards` must be positive. Received: 8Argument `bytes_per_string` must be positive. Received: N)
ValueError_min_shard_bytes_max_shards_bytes_per_string)r   min_shard_bytes
max_shardsbytes_per_strings       r#   r1   zMinSizePartitioner.__init__   s     &')  A~>zl
K  !'(*  ,D!D-Dr%   c                 ~     t        j                  | j                  || j                  | j                        ||      S )N)max_partitionsr"   min_slice_sizebytes_per_string_element)r   min_max_variable_partitionerr@   r?   rA   r   s       r#   r$   zMinSizePartitioner.__call__   sD     =='',,!%!7!7	
 U r%   N)i   r3      r&   r9   r+   r%   r#   r;   r;   r   s    , GI.:r%   r;   z7distribute.experimental.partitioners.MaxSizePartitionerc                        e Zd ZdZddZddZy)MaxSizePartitionera,  Partitioner that keeps shards below `max_shard_bytes`.

  This partitioner ensures each shard has at most `max_shard_bytes`, and tries
  to allocate as few shards as possible, i.e., keeping shard size as large
  as possible.

  If the partitioner hits the `max_shards` limit, then each shard may end up
  larger than `max_shard_bytes`. By default `max_shards` equals `None` and no
  limit on the number of shards is enforced.

  Examples:

  >>> partitioner = MaxSizePartitioner(max_shard_bytes=4)
  >>> partitions = partitioner(tf.TensorShape([6, 1]), tf.float32)
  >>> [6, 1]
  >>> partitioner = MaxSizePartitioner(max_shard_bytes=4, max_shards=2)
  >>> partitions = partitioner(tf.TensorShape([6, 1]), tf.float32)
  >>> [2, 1]
  >>> partitioner = MaxSizePartitioner(max_shard_bytes=1024)
  >>> partitions = partitioner(tf.TensorShape([6, 1]), tf.float32)
  >>> [1, 1]
  >>>
  >>> # use in ParameterServerStrategy
  >>> # strategy = tf.distribute.experimental.ParameterServerStrategy(
  >>> #   cluster_resolver=cluster_resolver, variable_partitioner=partitioner)
  Nc                     |dk  rt        d|       |r|dk  rt        d|       |dk  rt        d|       || _        || _        || _        y)ap  Creates a new `MaxSizePartitioner`.

    Args:
      max_shard_bytes: The maximum size any given shard is allowed to be.
      max_shards: The maximum number of shards in `int` created taking
        precedence over `max_shard_bytes`.
      bytes_per_string: If the partition value is of type string, this provides
        an estimate of how large each string is.
    r3   z6Argument `max_shard_bytes` must be positive. Received z1Argument `max_shards` must be positive. Received r=   N)r>   _max_shard_bytesr@   rA   )r   max_shard_bytesrC   rD   s       r#   r1   zMaxSizePartitioner.__init__   s     %&(  j1n=j\
J  !'(* 
 ,D!D-Dr%   c                 ~     t        j                  | j                  | j                  | j                  |      ||      S )N)rO   rC   rH   r"   )r   variable_axis_size_partitionerrN   r@   rA   r   s       r#   r$   zMaxSizePartitioner.__call__   sD     ??--##!%!7!7	
 U r%   )NrJ   r&   r9   r+   r%   r#   rL   rL      s    6.:r%   rL   c                   V    e Zd ZdZdgZ ed       Zd Zd Zed        Z	d Z
d Zd	 Zy
)ShardedVariableSpecz+Type specification for a `ShardedVariable`._variable_specsc                     t         S NShardedVariabler   s    r#   <lambda>zShardedVariableSpec.<lambda>   s    _ r%   c                 $    t        |      | _        y rV   )tuplerT   )r   variable_specss     r#   r1   zShardedVariableSpec.__init__   s     0Dr%   c                     | j                   S rV   rT   rY   s    r#   
_serializezShardedVariableSpec._serialize   s    r%   c                     | j                   S rV   r_   rY   s    r#   _component_specsz$ShardedVariableSpec._component_specs   s    r%   c                 ,    t        |j                        S rV   )r\   r   )r   r7   s     r#   _to_componentsz"ShardedVariableSpec._to_components  s    !!r%   c                     t        |      S rV   rW   )r   r   s     r#   _from_componentsz$ShardedVariableSpec._from_components  s    9%%r%   c                     |S rV   r+   )r   r7   _s      r#   _castzShardedVariableSpec._cast	  s    Lr%   N)r'   r(   r)   r*   	__slots__property
value_typer1   r`   rb   rd   rf   ri   r+   r%   r#   rS   rS      sE    3 !)45*1     "&r%   rS   c                   J    e Zd ZdZd! fd	Zd Zd Zd Zed        Z	ed        Z
ed        Zed	        Zed
        Zd"dZd#dZd#dZd Zd Zd$dZd$dZd$dZd$dZd$dZd$dZd$dZd$dZd%dZd Zd Zd Zed        Zed        Z ed        Z!ed        Z"d  Z# xZ$S )&ShardedVariableMixinzMixin for ShardedVariable.c                 X   t         t        |           || _        || _        t        |t              r|rt        d |D              rt        d|       |D ch c]  }|j                   }}t        |      dkD  r&t        d|D cg c]  }|j                   c}       |d   }|j                  | _        |D ch c](  }t        |j                  j                         dd       * }}t        |      dkD  r&t        d|D cg c]  }|j                   c}       t!        d |D              }t#        j$                  |g|j                  j                         dd z         | _        |D ]  }t)        j*                  |       |_         t/        t        |            D cg c].  }t/        t        |j                              D cg c]  }d c}0 c}| _        t/        dt        |            D ]S  }	| j0                  |	   dxx   | j0                  |	dz
     d   ||	dz
     j                  j                         d   z   z  cc<   U |D cg c]  }|j3                          }
}t        d	 |
D              rt        d
|
       t5        j6                  | j&                  | j                  | j                  | j                  d   j8                  t:        j<                  j>                  t:        j@                  j>                        | _!        yc c}w c c}w c c}w c c}w c c}w c c}w c c}w )a  Treats `variables` as shards of a larger Variable.

    Example:

    ```
    variables = [
      tf.Variable(..., shape=(10, 100), dtype=tf.float32),
      tf.Variable(..., shape=(15, 100), dtype=tf.float32),
      tf.Variable(..., shape=(5, 100), dtype=tf.float32)
    ]
    sharded_variable = ShardedVariableMixin(variables)
    assert sharded_variable.shape.as_list() == [30, 100]
    ```

    Args:
      variables: A list of `ResourceVariable`s that comprise this sharded
        variable. Variables should not be shared between different
        `ShardedVariableMixin` objects.
      name: String. Name of this container. Defaults to "ShardedVariable".
    c              3   R   K   | ]  }t        |t        j                          ! y wrV   )
isinstancevariables_libVariable.0vs     r#   	<genexpr>z0ShardedVariableMixin.__init__.<locals>.<genexpr>/  s      LQ:a!7!788Ls   %'zSArgument `variables` should be a non-empty list of `variables.Variable`s. Received r3   zPAll elements in argument `variables` must have the same dtype. Received dtypes: r   NzkAll elements in argument `variables` must have the same shapes except for the first axis. Received shapes: c              3   h   K   | ]*  }t        |j                  j                         d           , yw)r   N)intr    as_listrt   s     r#   rw   z0ShardedVariableMixin.__init__.<locals>.<genexpr>H  s%     A!C)!,-As   02c              3   $   K   | ]  }|d u 
 y wrV   r+   )ru   
slice_infos     r#   rw   z0ShardedVariableMixin.__init__.<locals>.<genexpr>Z  s     
Dj:T!
Ds   z`SaveSliceInfo` should not be set for all elements in argument `variables`. `ShardedVariable` will infer `SaveSliceInfo` according to the order of the elements `variables`. Received save slice info )r    r!   name	trainablesynchronizationaggregation)"superrn   r1   
_variables_namerq   r   any	TypeErrorr!   r4   r>   _dtyper\   r    rz   sumr   TensorShape_shapeweakrefref_sharded_containerrange_var_offsets_get_save_slice_infor   UninitializedVariabler~   rr   VariableSynchronizationNONEVariableAggregation_saving_variable)r   r   r}   rv   
var_dtypes	first_varhigher_dim_shapes	first_dimrh   isave_slice_info	__class__s              r#   r1   zShardedVariableMixin.__init__  s   * 

.0DODJ y(+L)LL--6K9 
 $--a!''-J-
:09:1qww:;= 
 !I//DK @II!qww045II
!09:1qww:;= 
 AyAAI**	ioo--/33DK  /$[[.a/ ;@I:O56E#ioo./0q0D 1c)n% 
1


AE
"1
%	!a%(8(>(>(F(F(H(K
K :CCAq--/COC

DO
DD& '6%68  2GGkkkkZZ//!$..%==BB!55::D_ . ; J
 ; 	1 Ds6   L	L
;-LL
 %L"	L
L"
L'L"c                 ,    t        | j                        S )zBReturn an iterable for accessing the underlying sharded variables.)iterr   rY   s    r#   __iter__zShardedVariableMixin.__iter__n  s      r%   c           	      T   t        |t              sdt        |t        j                        r|j                  t
        j                  k(  s-t        |t        j                        r5|j                  t        k(  r"t        |       }t        j                  ||      S t        |t        t        f      s|f}|d   }t        |t              r| j                  |      }g }t        | j                         D ]+  \  }}||   ||   f|dd z   }|j#                  ||          - |j$                  |j$                  dk  r|j'                          |s2t)        j*                  g | j,                  d| j.                  dd z         S t        j0                  |d      S |t2        u r2t        j0                  | j                   D cg c]  }||   	 c}d      S |t        j4                  u rFt        j0                  | j                   D cg c]
  }||dd     c}d      t        j4                     S t        |t        j                        rt7        d      |dk  r|| j.                  d   z  }|dk  s|| j.                  d   k\  rt9        d	| d
      t;        t=        | j                               D ]y  }|t=        | j                         dz
  k(  s/|| j>                  |   d   k\  s4|| j>                  |dz      d   k  sM| j                   |   || j>                  |   d   z
  f|dd z      c S  yc c}w c c}w )a  Extracts the specified region as a Tensor from the sharded variable.

    The API contract is identical to `Tensor.__getitem__`. Assignment to the
    sliced range is not yet supported.

    Args:
      slice_spec: The arguments to __getitem__, specifying the global slicing of
        the sharded variable.

    Returns:
      The appropriate slice of tensor based on `slice_spec`.

    Raises:
      IndexError: If a slice index is out of bound.
      TypeError: If `spec_spec` contains Tensor.
    )r	   maskr   Nr3   r&   )r!   r    r"   z:ShardedVariable: using Tensor for indexing is not allowed.zShardedVariable: slice index z of dimension 0 out of bounds.) rq   bool
tensor_libTensorr!   r   npndarray_var_to_tensorr   boolean_masklistr\   slice_decompose_slice_spec	enumerater   appendstepreverser   constantr   r   concatEllipsisnewaxisr   
IndexErrorr   r4   r   )	r   
slice_specr	   sfirst_dim_slice_specsvaluesr   varall_dim_slice_specs	            r#   __getitem__z ShardedVariableMixin.__getitem__r  s   , 	:t$z:#4#45  FKK/z2::.:3C3Ct3Kd#f##6
CCj4-0=j1A!U"88;fdoo. 1&!S #/ 5a 8:Z^K

--./
01 
	
##dkk$QR*@
 	
 f1--	
h&*oo
6s3z?
6Q  
i	*.//
:33z!"~
:  
Az((	)H
 	
 
Q	T[[^	
Q!t{{1~%+A3.LM
 	
 S)* !DOO$q((""1%a((Q1B1B1q51I!1L-L#4$$Q'**,z!"~= 	# 7 ;s   6L :L%c           	      <   t        |j                  t        j                        sHt        |j                  t        j                        s$t        |j
                  t        j                        rt        d      g }|j
                  |j
                  nd}|dk(  rt        d      |j                  }||dkD  rdn| j                  d   dz
  }n|dk  r|| j                  d   z  }|j                  }||dkD  r| j                  d   nd}n|dk  r|| j                  d   z  }|}|dkD  rt        t        | j                              D ]  }| j                  |   d   }|t        | j                        dz
  k  r| j                  |dz      d   n| j                  d   }	||k  r*||t        t        j                  ||z
  |z              z  z  }||	k\  s||k\  r|j                  d       ||z
  }
t!        ||	      |z
  }|j                  t#        |
||              |S t        t        | j                        dz
  dd      D ]  }| j                  |   d   }|t        | j                        dz
  k  r| j                  |dz      d   n| j                  d   }	||	k\  r-||t        t        j                  |	|z
  dz
  |z              z  z  }||k  s||k  r|j                  d       ||z
  }
||k\  r||z
  }nd}|j                  t#        |
||              |j%                          |S )a  Decompose a global slice_spec into a list of per-variable slice_spec.

    `ShardedVariable` only supports first dimension partitioning, thus
    `slice_spec` must be for first dimension.

    Args:
      slice_spec: A python `slice` object that specifies the global slicing.

    Returns:
      A list of python `slice` objects or None specifying the local slicing for
      each component variable. None means no slicing.

    For example, given component variables:
      v0 = [0, 1, 2]
      v1 = [3, 4, 5]
      v2 = [6, 7, 8, 9]

    If `slice_spec` is slice(start=None, stop=None, step=None), we will have:
      v0[returned[0]] = [0, 1, 2]
      v1[returned[1]] = [3, 4, 5]
      v2[returned[2]] = [6, 7, 8, 9]
    If `slice_spec` is slice(start=2, stop=8, step=3), we will have:
      v0[returned[0]] = [2]
      v1[returned[1]] = [5]
      returned[2] == None
    If `slice_spec` is slice(start=9, stop=3, step=-2), we will have:
      returned[0] == None
      v1[returned[1]] = [5]
      v2[returned[2]] = [9, 7]
    zsShardedVariable: using Tensor in slice_spec is not allowed. Please file a feature request with the TensorFlow team.Nr3   r   zslice step cannot be zero)rq   startr   r   stopr   r   r>   r   r   r4   r   ry   mathceilr   r5   r   r   )r   r   r8   
slice_stepslice_start	slice_endcurr   	var_startvar_endr   ends               r#   r   z*ShardedVariableMixin._decompose_slice_spec  s!   @ 	:##Z%6%67jooz'8'89jooz'8'89= 
 F$.OO$?QJQ233""K#aAT[[^a-?k	qT[[^#kI %/N$++a.i	Q4;;q>!i CA~S**+, 7!%%a(+	 3t(()A-- a!e$Q'Q 	
 ?
c$))Y_
,J"KLL
L#'>SI-
--
	/%Iw')3#
--eS*5
67J M+ S**+a/R8 7!%%a(+	 3t(()A-- a!e$Q'Q 	
 '>
c$))Ws]Q->*,L"MNN
N#?cY.
--
	/%)#i'CC
--eS*5
6#7& nnMr%   c                 4    t        d | j                  D         S )Nc              3   p   K   | ].  }t        j                  |j                  |j                         0 y wrV   r   VariableSpecr    r!   rt   s     r#   rw   z2ShardedVariableMixin._type_spec.<locals>.<genexpr>.  s-      

 "..qww@

   46rS   r   rY   s    r#   
_type_speczShardedVariableMixin._type_spec+  s     

__

 r%   c                 \    t        j                         r| j                  gS | j                  S )z?The list of `Variable`s that make up the shards of this object.)r   in_save_contextr   r   rY   s    r#   r   zShardedVariableMixin.variables4  s)     ##%##$$??r%   c                     | j                   S )z0The name of this object. Used for checkpointing.r   rY   s    r#   r}   zShardedVariableMixin.name;  s     ::r%   c                     | j                   S )z,The dtype of all `Variable`s in this object.)r   rY   s    r#   r!   zShardedVariableMixin.dtype@       ;;r%   c                     | j                   S )z7The overall shape, combining all shards along axis `0`.)r   rY   s    r#   r    zShardedVariableMixin.shapeE  r   r%   c           	          t        | j                        D ]P  \  }}|j                  t        j                  || j
                  |   |j                  j                                      R | S rV   )r   r   assignr   r   r   r    rz   )r   r7   use_lockingr}   
read_valuer   rv   s          r#   r   zShardedVariableMixin.assignJ  sT    $//* P1hhyud&7&7&:AGGOO<MNOPKr%   c           	          t        | j                        D ]P  \  }}|j                  t        j                  || j
                  |   |j                  j                                      R | S rV   )r   r   
assign_addr   r   r   r    rz   r   deltar   r}   r   r   rv   s          r#   r   zShardedVariableMixin.assign_addO  W    $//* 1ll
//%!2!21!5qww7H
I Kr%   c           	          t        | j                        D ]P  \  }}|j                  t        j                  || j
                  |   |j                  j                                      R | S rV   )r   r   
assign_subr   r   r   r    rz   r   s          r#   r   zShardedVariableMixin.assign_subV  r   r%   c                 &   |j                   j                  dk7  rt        d|j                          | j                  d   t	        | j
                        z  }| j                  d   t	        | j
                        z  }|gt	        | j
                        z  }t        |      D ]  }||   dz   ||<    | j
                  D cg c]  }|j                   j                         d   ! }}||k7  rt        d      t        j                  ||dz   z  ||z
  |z        }t        j                  ||k  ||dz   z  ||z
  |z        }	t        j                  |t        j                        }t!        j"                  |	|t	        | j
                              }
|
|fS c c}w )zBDecompose a global 1D indices into a list of per-variable indices.r3   zRShardedVariable: indices must be 1D Tensor for sparse operations. Received shape: r   z\scatter_xxx ops are not supported in ShardedVariable that does not conform to "div" sharding)r    rankr>   r   r4   r   r   rz   r   r   maximumr   wherecastr   int32r   dynamic_partition)r   indicesr   extraexpect_first_dimr   rv   actual_first_dimpartition_assignmentslocal_indicesper_var_indicess              r#   _decompose_indicesz'ShardedVariableMixin._decompose_indices]  s   }}Q$]]O- 
 ;;q>S11DKKNS11E vDOO 445\ 4,Q/!3q46:ooF)!,FF++&  %,,D1H%D8 OO%4!8	5D M %MM*?N#55,c$//.BO 111? Gs   >$Fc                 >   | j                  |j                        \  }}t        j                  |j                  |t        | j                              }t        t        | j                              D cg c]  }t        j                  ||   ||         ! c}S c c}w )zDDecompose a global `IndexedSlices` into a list of per-variable ones.)r   r   )
r   r   r   r   r   r4   r   r   indexed_slices_libIndexedSlices)r   r   r   r   per_var_valuesr   s         r#   _decompose_indexed_slicesz.ShardedVariableMixin._decompose_indexed_slices  s    -1-D-D.*O* #444c$//6JN s4??+,	  	((!!$oa.@	
  s   3$Bc                     | j                  |      }t        | j                        D ]1  \  }}d}|dj                  ||      }|j	                  ||   |       3 | S )z#Implements tf.Variable.scatter_add.N
{}/part_{}r}   )r   r   r   formatscatter_addr   sparse_deltar   r}   per_var_sparse_deltar   rv   new_names           r#   r   z ShardedVariableMixin.scatter_add  i    99,G$//* <1h		&&tQ/mm(+(m;	<
 Kr%   c                     | j                  |      }t        | j                        D ]1  \  }}d}|dj                  ||      }|j	                  ||   |       3 | S )z#Implements tf.Variable.scatter_div.Nr   r   )r   r   r   r   scatter_divr   s           r#   r  z ShardedVariableMixin.scatter_div  r   r%   c                     | j                  |      }t        | j                        D ]1  \  }}d}|dj                  ||      }|j	                  ||   |       3 | S )z#Implements tf.Variable.scatter_max.Nr   r   )r   r   r   r   scatter_maxr   s           r#   r  z ShardedVariableMixin.scatter_max  r   r%   c                     | j                  |      }t        | j                        D ]1  \  }}d}|dj                  ||      }|j	                  ||   |       3 | S )z#Implements tf.Variable.scatter_min.Nr   r   )r   r   r   r   scatter_minr   s           r#   r  z ShardedVariableMixin.scatter_min  r   r%   c                     | j                  |      }t        | j                        D ]1  \  }}d}|dj                  ||      }|j	                  ||   |       3 | S )z#Implements tf.Variable.scatter_mul.Nr   r   )r   r   r   r   scatter_mulr   s           r#   r  z ShardedVariableMixin.scatter_mul  r   r%   c                     | j                  |      }t        | j                        D ]1  \  }}d}|dj                  ||      }|j	                  ||   |       3 | S )z#Implements tf.Variable.scatter_sub.Nr   r   )r   r   r   r   scatter_subr   s           r#   r	  z ShardedVariableMixin.scatter_sub  r   r%   c                     | j                  |      }t        | j                        D ]1  \  }}d}|dj                  ||      }|j	                  ||   |       3 | S )z&Implements tf.Variable.scatter_update.Nr   r   )r   r   r   r   scatter_updater   s           r#   r  z#ShardedVariableMixin.scatter_update  sl    99,G$//* ?1h		&&tQ/+A.X>	?
 Kr%   c                     | j                  |      }t        | j                        D ]1  \  }}d}|dj                  ||      }|j	                  ||   |       3 | S )z,Implements tf.Variable.batch_scatter_update.Nr   r   )r   r   r   r   batch_scatter_updater   s           r#   r  z)ShardedVariableMixin.batch_scatter_update  sn    99,G$//* E1h		&&tQ/1!48D	E
 Kr%   c                    | j                  |      \  }}g }t        | j                        D ]@  \  }}d}|dj                  ||      }|j	                  |j                  ||   |             B t        j                  |d      S )z#Implements tf.Variable.sparse_read.Nr   r   r   r   )r   r   r   r   r   sparse_readr   r   )	r   r   r}   r   rh   r8   r   rv   r   s	            r#   r  z ShardedVariableMixin.sparse_read  s    009OQF$//* F1h		&&tQ/mmAMM/!"48MDE	F
 F++r%   c                 J      j                   f fd	}t        j                  |iS )z4Return a `Saveable` for each shard. See `Trackable`.c                 2   g }t        j                  d   j                        }t        |      D cg c]  }d }}j                  D ]  }t        j
                  j                  j                  j                  j                         t        j                  |      |j                  j                               }|j                  t        j                  ||j                  |              |dxx   t        |j                  d         z  cc<    |S c c}w )z5Creates `SaveableObject`s for this `ShardedVariable`.r   )	full_name
full_shape
var_offset	var_shape)r4   r   r    r   rr   rs   SaveSliceInfor}   rz   copyr   r   ResourceVariableSaveablespecry   )r}   	saveablesr6   rh   r  rv   r   r   s          r#   _saveable_factoryzPShardedVariableMixin._gather_saveables_for_checkpoint.<locals>._saveable_factory  s    i#))*d$T{+!A+j+ )!'00>>iizz))+yy,ggoo'	 ? 
 	 99?''	

 	1QWWQZ()  ,s   	D)r}   	trackableVARIABLE_VALUE_KEY)r   r  s   ` r#    _gather_saveables_for_checkpointz5ShardedVariableMixin._gather_saveables_for_checkpoint  s%      $yy ( ((*;<<r%   c                      y)1For implementing `Trackable` async checkpointing.Nr+   )r   
object_maps     r#   _copy_trackable_to_cpuz+ShardedVariableMixin._copy_trackable_to_cpu  s    r%   c                     g }| j                   | j                  gz   D ]&  }|j                   |j                  |||fi |       ( t	        || j                     g| j
                        || <   |S )z/For implementing `Trackable` SavedModel export.r   )r   r   extend_export_to_saved_model_graphrX   r}   )r   r!  
tensor_mapoptionskwargsresource_listrv   s          r#   r%  z1ShardedVariableMixin._export_to_saved_model_graph  s     M__ 5 566 
(!
(
(*g17 '	D))	*+$))Jt r%   c                 T    | j                   d   j                  j                  dd      S )Nr   part_0sharded)r   
_unique_idreplacerY   s    r#   r-  zShardedVariableMixin._unique_id,  s&     >>!''//)DDr%   c                 4    | j                   d   j                  S Nr   )r   _distribute_strategyrY   s    r#   r1  z)ShardedVariableMixin._distribute_strategy1  s    >>!111r%   c                     | j                   S rV   r   rY   s    r#   _shared_namez!ShardedVariableMixin._shared_name5  s    ::r%   c                      y)NTr+   rY   s    r#   is_sharded_variablez(ShardedVariableMixin.is_sharded_variable9      r%   c                 4    t        |       j                         S )a-  Copies the values in this ShardedVariable to a NumPy array.

    First converts to a single Tensor using the registered conversion function,
    which concatenates the shards, then uses Tensor.numpy() to convert to
    a NumPy array.

    Returns:
      A NumPy array of the same shape and dtype.
    )r   numpyrY   s    r#   r8  zShardedVariableMixin.numpy=  s     $%%''r%   rW   )NNT)FNT)FNrV   )%r'   r(   r)   r*   r1   r   r   r   rk   r   r   r}   r!   r    r   r   r   r   r   r   r  r  r  r  r	  r  r  r  r  r"  r%  r-  r1  r3  r5  r8  __classcell__)r   s   @r#   rn   rn     s1   "
Yv!KZjX          
.2`$	,=2<  E E 2 2    
(r%   rn   z'__internal__.distribute.ShardedVariablec                   X    e Zd ZdZed        Zed        Zed        Zd Z	d Z
d Zd Zy	)
rX   a  A container for `Variables` that should be treated as shards.

  Variables that are too large to fit on a single device (e.g., large
  embeddings)
  may need to be sharded over multiple devices. This class maintains a list of
  smaller variables that can be independently stored on separate devices (eg,
  multiple parameter servers), and saves and restores those variables as if they
  were a single larger variable.

  Objects of this class can be saved with a given number of shards and then
  restored from a checkpoint into a different number of shards.

  Objects of this class can be saved to SavedModel format using
  `tf.saved_model.save`. The SavedModel can be used by programs like TF serving
  APIs. It is not yet supported to load the SavedModel with
  `tf.saved_model.load`.

  Since `ShardedVariable` can be saved and then restored to different number of
  shards depending on the restore environments, for example, TF serving APIs
  would restore to one shard for serving efficiency, when using
  `ShardedVariable` in a tf.function, one should generally not assume it has the
  same number of shards across save and load.

  Sharding is only supported along the first dimension.

  >>> class Model(tf.Module):
  ...   def __init__(self):
  ...     self.sharded_variable = ShardedVariable([
  ...       tf.Variable([3.0], dtype=tf.float32),
  ...       tf.Variable([2.0], dtype=tf.float32)
  ...     ])
  ...
  ...   @tf.function(input_signature=[tf.TensorSpec([], dtype=tf.int32)])
  ...   def fn(self, x):
  ...     return tf.nn.embedding_lookup(self.sharded_variable.variables, x)
  ...
  ...   @tf.function(input_signature=[tf.TensorSpec([], dtype=tf.int32)])
  ...   def serve_fn(self, x):
  ...     return tf.nn.embedding_lookup(self.sharded_variable.variables, x)
  >>>
  >>> model = Model()
  >>> model.fn(1).numpy()
  2.0
  >>> tf.saved_model.save(model, export_dir='/tmp/saved_model',
  ...   signatures=model.serve_fn)
  c                 4    t        d | j                  D         S )Nc              3   p   K   | ].  }t        j                  |j                  |j                         0 y wrV   r   rt   s     r#   rw   z-ShardedVariable._type_spec.<locals>.<genexpr>~  s-      
$  
,
,QWWagg
> 
$r   r   rY   s    r#   r   zShardedVariable._type_spec{  s     
$??
$% %r%   c                 p    t         j                  j                  D ]  }|dk(  r	| j                  |        y)z%Register overloads for all operators.r   N)r   r   OVERLOADABLE_OPERATORS_overload_operator)clsoperators     r#   _overload_all_operatorsz'ShardedVariable._overload_all_operators  s8     %%<< '	]	"	X&	'r%   c                 ^    t        t        j                  |      fd}t        | ||       y)z5Delegate an operator overload to `tensor_lib.Tensor`.c                 .     t        |       g|i |S rV   )r   )rv   argsr(  tensor_operators      r#   	_operatorz5ShardedVariable._overload_operator.<locals>._operator  s    ^A.@@@@r%   N)getattrr   r   setattr)r@  rA  rG  rF  s      @r#   r?  z"ShardedVariable._overload_operator  s+     j//:OA C9%r%   c                      y rV   r+   )r   concrete_functioninternal_captures      r#   #__tf_experimental_restore_capture__z3ShardedVariable.__tf_experimental_restore_capture__  s     r%   c                      y)z6Pass resource_variable_ops.is_resource_variable check.Tr+   rY   s    r#    _should_act_as_resource_variablez0ShardedVariable._should_act_as_resource_variable  r6  r%   c                 J    t        j                  | j                  ||d       y )NF)enforce_naming)r   (write_object_proto_for_resource_variabler   )r   protor'  s      r#   _write_object_protoz#ShardedVariable._write_object_proto  s    BBuger%   c                     | |v r#| j                   D ]  }|j                  |        yg }| j                   D ]'  }|j                  |       |j                  ||          ) t        || j                        }||| <   y)r   r   N)r   r"  r   rX   r}   )r   r!  rv   copied_varsnew_vars        r#   r"  z&ShardedVariable._copy_trackable_to_cpu  s    z -!	  ,- k *!	  ,:a=)*  $))<g jr%   N)r'   r(   r)   r*   rk   r   classmethodrB  r?  rM  rO  rT  r"  r+   r%   r#   rX   rX   J  sX    -^ % %
 ' ' & &
!r%   rX   c                 N   ~|T|j                  | j                        s9t        dj                  |j                  | j                  j                              |rt        d      dt        j                         v rt        d      t        j                  | j                  d      S )z+Converts a `ShardedVariable` to a `Tensor`.zMIncompatible type conversion requested to type {!r} for variable of type {!r}z:ShardedVariable doesn't support being used as a reference.embedding_lookupzKConverting ShardedVariable to tensor in embedding lookup ops is disallowed.r   r   )is_compatible_withr!   r>   r   r}   r   r   get_name_scoper   r   r   r   )r   r!   r}   as_refs       r#   r   r     s    

u77		B
	vejj#))..9  
D   3--//
	  
		#--a	00r%   c                 t    t        | t              r| d   } t        j                  | j                  |||||      S r0  )rq   r   r   rZ  r   )paramsidspartition_strategyr}   validate_indicesmax_norms         r#   rZ  rZ    sB     AYF		'	'	

 r%   embedding_weightsc	                 R    t        j                  | j                  ||||||||	      S )z.Pass the individual shard variables as a list.)sparse_weightscombiner
default_idr}   ra  rc  allow_fast_lookup)r   safe_embedding_lookup_sparser   )	rd  
sparse_idsrf  rg  rh  r}   ra  rc  ri  s	            r#   rj  rj    s8     
	3	3!!#+)

 
r%   )NNF)modNTN)NmeanNNdivNF):r*   r  r   typingr   r   r8  r   tensorflow.python.frameworkr   r   r   r   r   r   r	   r   r
   r   r   tensorflow.python.opsr   r   r   r   r   r   r   rr   tensorflow.python.saved_modelr   tensorflow.python.trackabler   r  !tensorflow.python.training.savingr   tensorflow.python.utilr    tensorflow.python.util.tf_exportr   objectr   r-   r;   rL   TypeSpecrS   	Trackablern   CompositeTensorrX   r   #register_tensor_conversion_functionrB  dispatch_for_typesrZ  dispatch_for_apirj  r+   r%   r#   <module>r~     s         8 3 . L + < B 4 1 + / / * 7 7 < 6 9 B + 6 ="E!& ! F!H HRP[  Q< DL9 9 M9x DL? ? M?D),, 6z(9.. z(z 4<i!*,<,L,L i! =i!X1F ?  > >^  ' ' ) ];;_M 	 N, =EEF 	& Gr%   