
    AVhj                     z   d Z ddlZddlZddlmZmZmZmZ ddlm	Z	 ddlm
Z ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dZ d Z!d Z"d Z# G d de$      Z% G d de$      Z&ejN                  fdZ(d Z)d Z*d Z+d  Z,d! Z-d" Z.d# Z/y)$zUtilities for cross_device_ops.    N)CallableListOptionalUnion)collective_util)values)backprop_util)context)dtypes)indexed_slices)ops)tensor_spec)	array_ops)collective_ops)cond)math_ops)nccl_ops)resource_variable_ops)
tf_logging)cored   c           
         g }t        |  D ]`  }|D cg c]  \  }}|	 }}}t        j                  |      }|j                  t        ||      D cg c]  \  }\  }}||f c}}}       b t	        t        |       }|S c c}}w c c}}}w )z)Aggregate gradients using nccl allreduce.)zipr   all_sumappendlist)replica_gradsagg_all_g_and_vsingle_g_and_vg_single_grads	agg_gradsvs           _/home/dcms/DCMS/lib/python3.12/site-packages/tensorflow/python/distribute/cross_device_utils.pyaggregate_gradients_using_ncclr&   )   s    /]+ Bn"01$!QA1L1  .I!$Y!?@@IAv1!Q@BB o.//	 2 	As   BBc           
         g }t        |       }|dz  }t        t        |       D ]  \  }}||z  }||z   |z  }||k  rd}	|}
n|}	d}
||	|	|z    }t        j                  | |         5  t        |dd      \  }}ddd       ||
|
|z    }t        j                  | |         5  t        |dd      \  }}ddd       t        j                  | |         5  t        gdd      \  \  }}}ddd       t        j                  | |         5  t        j                        }ddd       t        j                  | |         5  t        j                        }ddd       g }t        t        |            D ]W  }t        j                  | |         5  ||k  ||k  k(  r}n}|j                  t        j                  |             ddd       Y |j                  t        ||      D cg c]  \  }\  }}||f c}}}        t        t        |       }|S # 1 sw Y   xY w# 1 sw Y   txY w# 1 sw Y   KxY w# 1 sw Y   "xY w# 1 sw Y   xY w# 1 sw Y   xY wc c}}}w )a  Aggregate gradients using hierarchical copies.

  Args:
    avail_devices: available GPU devices.
    replica_grads: List of lists of (gradient, variable) tuples. The outer list
      is over replicas. The inner list is over individual gradients.

  Returns:
    The list of (aggregated_gradient, variable), where the gradient has been
      summed across all replicas and the variable is chosen from the first
      replica.
     r   FN)len	enumerater   r   device$aggregate_single_gradient_using_copyr   identityranger   r   )avail_devicesr   r#   num_devices
group_sizeir"   group_0_main_devicegroup_1_main_devicegroup_0_begingroup_1_begingroup_0_device_gradsgroup_0_agg_gradsr!   group_1_device_gradsgroup_1_agg_gradsagg_total_gradsgroup_0_agg_grads_bcastgroup_1_agg_grads_bcastagg_grads_bcastjsrc_device_gradr    r$   s                           r%   +aggregate_gradients_using_hierarchical_copyrA   7   s   0 )M"+ a*"3#67 .Foak/.;{JZ'm m mm ((5
(BD	M"56	7 .A
u..
 ((5
(BD	M"56	7 .A
u..
 
M"56	7 @ D/
0%!@A@
 
M"56	7 D ) 2 2? CD	M"56	7 D ) 2 2? CD O3|$% D::mA&' D*,!j.A3/3/y11/BCD DD !$_l!CDDIAv1!QDF[.F` 3	?#)	K. .. .
@ @
D DD D
D D 	EsN   &HH"H/H<<I	5I%I!H	"H,	/H9	<I		I	Ic                 f   | D cg c]  \  }}|	 }}}t        j                  |      }|r0t        |      dkD  r"t        j                  |dt        |      z        }| d   d   }|rAt        j
                  t        j                  t        j                  |                  }||f|fS ||fdfS c c}}w )a&  Calculate the average gradient for a shared variable across all replicas.

  Note that this function provides a synchronization point across all replicas.

  Args:
    grad_and_vars: A list or tuple of (gradient, variable) tuples. Each
      (gradient, variable) pair within the outer list represents the gradient
      of the variable calculated for a single replica, and the number of pairs
      equals the number of replicas.
    use_mean: if True, mean is taken, else sum of gradients is taken.
    check_inf_nan: check grads for nans and infs.

  Returns:
    The tuple ([(average_gradient, variable),], has_nan_or_inf) where the
      gradient has been averaged across all replicas. The variable is chosen
      from the first replica. The has_nan_or_inf indicates the grads has nan or
      inf.
     g      ?r   N)r   add_nr)   r   multiplylogical_not
reduce_all	is_finite)	grad_and_varsuse_meancheck_inf_nanr    r!   gradsgradr$   has_nan_or_infs	            r%   r,   r,      s    ( '
'A1
'%
'		$#e*q.dC#e*$45DAq!**Y0078:N!9n$$!9d? (s   B-c                   0    e Zd ZdZddZd Zd Zd Zd Zy)	CollectiveKeysa  Class that manages collective keys.

  We need to manage three different keys for collective:

  *Group key*: an integer key to identify the set of cooperative devices.
  Collective ops work under the same set of devices must using the same group
  key.

  *Instance key*: an integer key to identify the set of same counterpart of
  tensors on different devices in a device group that need to be all-reduced.

  This class is thread safe.
  c                 `    || _         i | _        t        j                         | _        i | _        y)zaInitializes the object.

    Args:
      group_key_start: the starting integer of group key.
    N)
_group_key_instance_key_table	threadingLock_lock_known_groups)selfgroup_key_starts     r%   __init__zCollectiveKeys.__init__   s)     &DO!D!DJD    c                     | j                   5  dj                  |      }|| j                  vr| j                  |      | j                  |<   | j                  |   cddd       S # 1 sw Y   yxY w)a   Returns a group key for the list of local devices.

    The same group key is returned if the list of local devices is the same.

    Args:
      devices: a list of local canonical device strings in a collective group.

    Returns:
      a group key.
    ,N)rV   joinrW   _get_new_group_key)rX   devicesdevices_keys      r%   get_group_keyzCollectiveKeys.get_group_key   se     
 -HHW%k	D..	.*.*A*A'*J;',	- - -s   AA##A,c                     | j                   }| xj                   dz  c_         i | j                  |<   |D ]  }t        | j                  |   |<    |S )aJ  Returns a new group key.

    The caller should store and reuse the same group key for the same set of
    devices. Calling this method always returns a new group key.

    This method is not thread-safe.

    Args:
      devices: a list of canonical device strings in a collective group.

    Returns:
      a new group key.
    rC   )rR   rS   INSTANCE_KEY_START_NUMBER)rX   r`   new_keyr+   s       r%   r_   z!CollectiveKeys._get_new_group_key   sV     ooGOOqO(*DW% L2Kdw'/LNr[   c                     | j                   5  | j                  j                  |d      }|t        d| d      ||vrt        d| d|       ||   }||xx   dz  cc<   |cddd       S # 1 sw Y   yxY w)a  Returns a new instance key for use in defining a collective op.

    You should call this once per each collective op of a collective instance.

    Args:
      group_key: the group key returned by get_group_key(). You should not
        assign the group key yourself.
      device: a canonical device string. It should be the device this collective
        op is on.

    Returns:
      a new instance key.

    Raises:
      ValueError: when the group key is invalid or the device is not in the
      group.
    NzGroup z is not found.zDevice z is not present in group rC   )rV   rS   get
ValueError)rX   	group_keyr+   groupr$   s        r%   get_instance_keyzCollectiveKeys.get_instance_key   s    $ 
 &&**9d;e	6)N;<<	u	76(*CI;OPP
-aFmqm  s   AA--A6c                     t               }| j                  |_        t        j                  | j                  |      |_        |S N)rP   rR   copydeepcopyrS   )rX   memocopieds      r%   __deepcopy__zCollectiveKeys.__deepcopy__	  s6     FF!%t/G/G!NFMr[   N)rC   )	__name__
__module____qualname____doc__rZ   rb   r_   rk   rr    r[   r%   rP   rP      s     	-"*8r[   rP   c            
          e Zd ZdZdZdZdedededede	j                  f
dZd	eej                  ej                   f   fd
Zd Zd Zd Zd Zd Z	 	 ddej                  d	eeej                  ej                   f      dee	j                     dej0                  fdZdej                  dee	j                     dej0                  fdZ	 ddeeej                        dee	j                     dej0                  fdZ	 ddej                  dej                  dee	j                     dej0                  fdZ	 ddej>                  dee	j                     dej>                  fdZ y)CollectiveReplicaLauncherz"Launch collectives on one replica.Tri   r1   collective_keysr+   optionsc                 T   || _         || _        || _        || _        || _        | j                         rVt        j                         5  t        j                  |      5  t        j                  d      | _        d d d        d d d        y d | _        y # 1 sw Y   xY w# 1 sw Y   y xY w)Ng        )rR   _group_size_collective_keys_device_options_use_ordering_tokenr   
init_scoper+   r   ResourceVariable_ordering_token)rX   ri   r1   rz   r+   r{   s         r%   rZ   z"CollectiveReplicaLauncher.__init__  s      DO!D+DDLDM!>> JSZZ/ J4EEbIJ J J "dJ J J Js$   BB9BB	BB'control_inputc                 z    |&| j                         st        j                  |g      S t        j                         S rm   )r   r   control_dependenciesNullContextmanager)rX   r   s     r%   _control_inputz(CollectiveReplicaLauncher._control_input&  s5     )A)A)C%%}o66!!##r[   c                 L    t        j                         syt        j                  S NF)r   #executing_eagerly_outside_functionsry   _prefer_unique_instance_keyrX   s    r%   _use_unique_instance_keyz2CollectiveReplicaLauncher._use_unique_instance_key,  s    224$@@@r[   c                 L    t        j                         syt        j                  S r   )r   r   ry   _prefer_ordering_tokenr   s    r%   r   z-CollectiveReplicaLauncher._use_ordering_token1  s     224$;;;r[   c                    | j                         r't        j                         }t        |dd      r|j                  }t        |dd      rt        j                         sd|j                  rX|j                         5  |j                  | j                  t        j                  g t        j                              cddd       S | j                  j!                  | j"                  | j$                        }t        j&                  d      5  t        j(                  |t        j                        cddd       S | j                  j!                  | j"                  | j$                        S # 1 sw Y   yxY w# 1 sw Y   yxY w)zReturns the next instance key.is_control_flow_graphFNzCPU:0)dtype)r   r   get_default_graphgetattrouter_graphr
   executing_eagerlybuilding_function
as_defaultcapture_call_time_value_next_instance_keyr   
TensorSpecr   int32r~   rk   rR   r   r+   convert_to_tensor)rX   graphinstance_keys      r%   r   z,CollectiveReplicaLauncher._next_instance_key8  sA   $$& ##%e E2E:!! E2E:&&(U-D-D 	Q ..%%{'='=b&,,'OQ	Q 	Q ,,==OOT\\+ZZ  	I&&|6<<H	I 	I ""33DOO48LLB B	Q 	Q	I 	Is   =>E)
%E5)E25E>c                 P    | j                         r| j                  j                  S y rm   )r   r   handler   s    r%   _get_ordering_tokenz-CollectiveReplicaLauncher._get_ordering_tokenW  s%    !!!((( "r[   c                 "    | j                         S )z0Whether this launcher can order NCCL operations.)r   r   s    r%   can_order_ncclz(CollectiveReplicaLauncher.can_order_nccl[  s    ##%%r[   Ninput_tensorreturnc                    | j                         }| j                  j                  |      }| j                         }t	        j
                  | j                        5  | j                  |      5  t        j                  || j                  | j                  ||j                  j                  |j                  |      cddd       cddd       S # 1 sw Y   nxY wddd       y# 1 sw Y   yxY w)a  All-reduce a dense tensor.

    Args:
      input_tensor: a dense tensor. It must have the same shape on all replicas.
      control_input: if not None, add control edges between control_input and
        the all-reduce.
      options: an optional tf.distribute.experimental.CommunicationOptions. If
        provided, it overrides the default options.

    Returns:
      The reduced tensor.
    communication_hinttimeoutordering_tokenN)r   r   merger   r   r+   r   r   r   all_reduce_v2r}   rR   implementationvaluetimeout_seconds)rX   r   r   r{   r   r   s         r%   
all_reducez$CollectiveReplicaLauncher.all_reduce_  s    " **,Lmm!!'*G--/N	DLL	! 	)			]	+	)))




//
$3399))')	) 	) 	) 	) 	) 	) 	)s%   C#-AC;	C#C	C##C,c           
      ~   | j                         }| j                  j                  |      }| j                         }t	        j
                  | j                        5  t        j                  || j                  | j                  ||j                  j                  |j                  |      cddd       S # 1 sw Y   yxY w)a&  All-gather a dense tensor.

    Args:
      input_tensor: a dense tensor. It must have the same shape on all replicas.
      options: an optional tf.distribute.experimental.CommunicationOptions. If
        provided, it overrides the default options.

    Returns:
      The reduced tensor.
    r   N)r   r   r   r   r   r+   r   r   all_gather_v2r}   rR   r   r   r   )rX   r   r{   r   r   s        r%   _all_gatherz%CollectiveReplicaLauncher._all_gather~  s     **,Lmm!!'*G--/N	DLL	! )))




//
$3399))')) ) )s   AB33B<input_tensor_packsc           	      |   | j                   j                  |      }g }|D ]|  }t        j                         r*|D ]$  }|j	                  | j                  |d|             & Bt        j                  | j                        5  |D cg c]  }t        j                  |dg       }}|D cg c]  }t        j                  |       }}|j                  t        j                  j                  k(  r|r|d   }	nd}	| j                  t        j                   |d      |	|      }
|D cg c]  }t#        j$                  |       }}t        j&                  |
|d      }t)        ||      D ]*  \  }}|j	                  t        j                  ||             , 	 ddd        |S c c}w c c}w c c}w # 1 sw Y   xY w)a  Batch all-reduce dense tensors.

    This takes a list of batches of tensors. Using multiple batches have the
    benefit that it doesn't need to wait for all inputs to be ready to start the
    all-reduce.

    Args:
      input_tensor_packs: a list of lists of dense tensors.
      options: an optional tf.distribute.experimental.CommunicationOptions. If
        provided, it overrides the default options.

    Returns:
      A flat list of reduced tensors.
    Nr   axis)r   r   r
   r   r   r   r   r+   r   r   reshapeshaper   r   CommunicationImplementationNCCLconcatr   reduce_prodsplitr   )rX   r   r{   outputspackr   tflat_tensorsshapesr   reducedsnum_elementsflat_outputsr   flat_outputs                   r%   batch_all_reducez*CollectiveReplicaLauncher.batch_all_reduce  s   $ mm!!'*GG" B		"	"	$ ! 	GL
..tWE
F	G
 ZZ% 	B>BC)++At4C,C0451IOOA&5&5$$ <<AABFM#BKM MOO|!4mWN';ABa(..q1B,B",QG,$'$= B e[NN9,,[%@AB	B 	BB0 N D5 C	B 	Bs8   F1F"$F1*F'AF1%F,AF1"F11F;	r   c                    t        j                         rt        d      t        j                  | j
                        5  t        j                  t        j                  |      g      5  t        j                  |gt        j                  |      t        j                  |dz   t        j                  |            fd      }t        j                  ||      }| j                  t        j                  t        j                   |      d      |      }|dddf   }t        j"                  |      }t%        ||      }	| j                  |	|      }
g }t        | j&                        D ]!  }||z  }|j)                  |
||||   z           # t        j                  |d      }t        j                  t        j                  d|dz         dgt        j                  |dz   t        j                  |            fd      }t        j                  ||      cddd       cddd       S # 1 sw Y   nxY wddd       y# 1 sw Y   yxY w)aJ  All-gather a dense tensor.

    This method must be called inside a tf.function.

    Args:
      input_tensor: a dense tensor. It must have the same rank on all replicas,
        and dimensions other than `axis` need to be the same as well.
      axis: 0-D int32 Tensor. Dimension along which to gather. Must be in the
        range [0, rank(value)).
      options: an optional tf.distribute.experimental.CommunicationOptions. If
        provided, it overrides the default options.

    Returns:
      The gathered Tensor.

    Raises:
      RuntimeError: if called in eager mode.
    z*all_gather is not supported in eager mode.rC   r   r   )permN)r
   r   RuntimeErrorr   r+   r   r   r   r-   r   r   r.   rank	transposer   expand_dims_v2shape_v2
reduce_max	_pad_utilr}   r   )rX   r   r   r{   perm_preinput_tensor_tgathered_shape
first_dimsfull_axis_dimpadded_input_tensorgather_padded_out_tensorsplit_tensorsr2   	start_posout_tensor_t
perm_afters                   r%   
all_gatherz$CollectiveReplicaLauncher.all_gather  s   .   "EFF	DLL	! #@		!	!9#5#5l#C"D	E#@ !!68>>$'>>$(INN<$@ACh !**<hGn''

"
"9#5#5n#EA
N
n "!Q$'j))*5m%nmD "&!1!12Ew!OmT%%& F!%	5i	6@mAD E 	FF %%mQ7l ##>>!TAX&>>$(INN>$BCEj   J?G#@ #@ #@ #@ #@ #@ #@s$   *H:)F)H%	H:%H.	*H::Iinput_slicesc           	           j                   j                        t        j                   j                        5  dt
        t        j                  t        t        j                     gt        j                  f   dt        j                  ffdt        j                  j                         } j#                  |      dt        j                  dt        t        j                     dt        j                  f fdt%        j$                  t'        j(                  t'        j*                        t'        j,                               fdfd      cd	d	d	       S # 1 sw Y   y	xY w)
a9  All-reduce an IndexedSlices.

    This method can be called outside  tf.function.

    Args:
      input_slices: an IndexedSlices.
      options: an optional tf.distribute.experimental.CommunicationOptions. If
        provided, it overrides the default options.

    Returns:
      The reduced IndexedSlices.
    all_gather_fnr   c                 B    | j                         }j                  t        j                  j                  k(  r|g}ng }t        j                  |      5   | j                        }ddd       t        j                  |j                        S # 1 sw Y   +xY w)z/Use all_gather_fn to aggregate `IndexedSlices`.N)r   indicesdense_shape)r   r   r   r   r   r   r   r   r   IndexedSlicesr   )r   
all_valuescontrolall_indicesr   r{   s       r%   all_gather_indexed_sliceszVCollectiveReplicaLauncher.all_reduce_indexed_slices.<locals>.all_gather_indexed_slices   s    
 #<#6#6@
""77<<=L''%%g. 	E%l&:&:GD+	E++$002 	2	E 	Es   BBr   r{   c                    t        j                        }t        | |      }	j                  ||      }g }t	        	j
                        D ]!  }||z  }|j                  ||||   z           # t        j                  |d      S )z4all_gather tensors of different sizes using padding.r   )	r   r   r   r   r.   r}   r   r   r   )
r   r{   
max_lengthpadded_tensorall_padded_tensorsr   r2   r   all_lengthsrX   s
           r%   all_gather_with_paddingzTCollectiveReplicaLauncher.all_reduce_indexed_slices.<locals>.all_gather_with_padding6  s     ((5
!,
;!--mWEt''( 	CA*n)


1)I2=a.=A  B C	C q11r[   c                  (      j                         S rm   )r   )r   rX   s   r%   <lambda>zECollectiveReplicaLauncher.all_reduce_indexed_slices.<locals>.<lambda>H  s    +D,<,<= r[   c                              S rm   rw   )r   r   s   r%   r   zECollectiveReplicaLauncher.all_reduce_indexed_slices.<locals>.<lambda>I  s    +,CD r[   N)r   r   r   r+   r   r   r   
TensorLiker   r   OptionsTensorr   r   r   r   r   r   r   r   equalr   
reduce_min)rX   r   r{   lengthr   r   r   s   ``` @@@r%   all_reduce_indexed_slicesz3CollectiveReplicaLauncher.all_reduce_indexed_slices  s   2 mm!!'*G	DLL	! +F2!)@)@ ABDKKOQ2 ''2& |334f$$VW5k22O33429=2 YY
..!!+.!!+.0 >
DFM+F +F +Fs   DE((E1)NNrm   )!rs   rt   ru   rv   r   r   intrP   strr   r   rZ   r   r   r   r   	Operationr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rw   r[   r%   ry   ry     s   * $" " " ."8;"'//"$%031> +? $A
<B>)& HL37	)OO) eDOOS]]$BCD) //0	) =AKK	)>)doo )#O$;$;<)AE)8 48,tDOO45, //0, =AKK,d 48	=@OO=@ OO=@ //0	=@ =AKK	=@D 48EF"00EF //0EF ##	EFr[   ry   c                 `    t        d | D              rt        j                  |       S  ||       S )zGAggregate tensors using `accumulation_fn` and IndexedSlices via concat.c              3   P   K   | ]  }t        |t        j                           y wrm   
isinstancer   r   .0r$   s     r%   	<genexpr>z6aggregate_tensors_or_indexed_slices.<locals>.<genexpr>N  s     EA~33	4E   $&)anyr	   AggregateIndexedSlicesGradients)r   accumulation_fns     r%   #aggregate_tensors_or_indexed_slicesr  L  s,    EfEE88@@6""r[   c                     t        | t        j                        rMt        j                  |       } t        j                  | j
                  |z  | j                  | j                        S | |z  S rm   )r  r   r   r	   FlattenNestedIndexedSlicesr   r   r   )r   ns     r%   %divide_by_n_tensors_or_indexed_slicesr  T  s[    ~33444U;E''q(8%--(-(9(9; ; 19r[   c                    t        j                  |      5  t        | t        j                        rt        j                  | j                        }t        j                  | j                        }| j                   t        j                  | j                        }nd}t        j                  |||      }nt        j                  |       }ddd       |S # 1 sw Y   S xY w)z-Copies a tensor or IndexedSlices to a device.N)
r   r+   r  r   r   r   r-   r   r   r   )r   r+   copied_valuescopied_indicescopied_shaperesults         r%   'copy_tensor_or_indexed_slices_to_devicer  ]  s    
zz& )%556((6m ))%--8n				& ))%*;*;<++M>,8:f !!%(f) 
-) 
-s   B4CCc                     t        | t        j                        ryt        | t        j                        rt        d | j                  D              S y)NTc              3   P   K   | ]  }t        |t        j                           y wrm   r  r  s     r%   r  z$is_indexed_slices.<locals>.<genexpr>r  s$      K89
1n223Kr  F)r  r   r   	value_libDistributedValuesallr   )r   s    r%   is_indexed_slicesr  n  sJ    ~334y223 K=B\\K K K	r[   c                     g }g }g }g }t        |       D ]U  \  }}t        |      r#|j                  |       |j                  |       4|j                  |       |j                  |       W ||||fS )a  Split values into dense and sparse values.

  Args:
    values: a list of tensors or `PerReplica`s.

  Returns:
    Four lists:
      a list of dense values, a list of their indices in `values` and
      a list of sparse values, a list of their indices in `values`.
  )r*   r  r   )r   dense_valuesdense_indicessparse_valuessparse_indicesr2   r$   s          r%   split_by_sparsityr"  w  s     ,--. da1A!1 
}m^	CCr[   c                     d}| D ]  }|t        |d         z  } dg|z  }| D ]'  }|s|d   st        | D ]  \  }}||   J |||<    ) |S )zStitch values together according to their indices.

  Args:
    values_and_indices_list: a list of tuples of values and indices indicating
      the values and positions in the returned list.

  Returns:
    a stitched list of values.
  r   N)r)   r   )values_and_indices_listr   values_and_indicesr  r$   r2   s         r%   stitch_valuesr&    s     &3 )
c$Q'((F) 6F?&3 03)* $!Qay   q	
 
-r[   c                 2   |dk(  r| gS g }d}| D ]  }|j                   j                         }|t        j                  d|       | gc S ||j                  j
                  z  }|r||kD  r|j                  g        d}|d   j                  |       ||z  } |S )a  Groups `input_tensors` into chunks of `bytes_per_pack`.

  The method preserves the original order of `input_tensors`. The grouping is
  best effort, each pack could have more or less bytes than `bytes_per_pack`.
  It only groups values with known shape.

  Args:
    input_tensors: a list of Tensor.
    bytes_per_pack: an integer.

  Returns:
    A list of packs of Tensor. All values are grouped into one pack if
    `bytes_per_pack` is zero or any of the value has unknown shape.
  r   zAnot packing values due to the unknown or inconsistent shape of %sr   )r   r   loggingwarningr   sizer   )input_tensorsbytes_per_packpackslast_pack_sizer   r   r*  s          r%   group_by_sizer/    s      q?
%. e;;++-Loo
M
 _%++***D N^3ll2n	"IUdN!" 
,r[   c                    |t        j                  |       d   z
  }t        j                  |       }d|gg}t        j                  |t        j                  |dz
  dft
        j                        gd      }t        j                  | |      }|S )z?Pad the `input_tensor`'s first dimension to be `full_axis_dim`.r   rC   r(   )r   r   r   )r   r   r   r   zerosr   r   pad)r   r   missing_axis_dimtensor_rankpaddings_axispaddingsr   s          r%   r   r     s    "Y%7%7%Ea%HH|,+'()-oo[1_a0E $%	&(
 "lH=	r[   )0rv   rn   rT   typingr   r   r   r   tensorflow.python.distributer   r   r  tensorflow.python.eagerr	   r
   tensorflow.python.frameworkr   r   r   r   tensorflow.python.opsr   r   r   r   r   r   tensorflow.python.platformr   r(  tensorflow.python.typesr   rd   r&   rA   r,   objectrP   ry   rD   r  r  r  r  r"  r&  r/  r   rw   r[   r%   <module>r?     s    &   2 2 8 < 1 + . 6 + 3 + 0 & * * 7 < ( Od HbV bJwF wFt	 AI #"D2.%Pr[   