
    AVh#                        d Z ddlZddlmZmZmZ ddlmZ ddlmZ ddlm	Z	 ddlm
Z ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ  edg       dej4                  deeej8                  f   deee   ej8                  f   deee   ej8                  f   deeej8                  ej:                  f      f
d       Z edg       deej:                     fd       Z edg       dej4                  dedeeeej8                  ej:                  f   f   fd       Z  edg       dej4                  deeej8                  f   deeeej8                  ej:                  f   f   fd        Z!y)!z=Contains functionaility for Checkpoint/SavedModel in DTensor.    N)DictListUnion)api)
d_variable)gen_dtensor_ops)layout)	mesh_util)context)errors_impl)ops)tensor)io_ops)	variables)	tf_exportz!experimental.dtensor.sharded_save)v1meshfile_prefixtensor_namesshape_and_slicestensorsc                    t        j                  t        j                               5  t	        j
                  ||||       ddd       t        j                  | j                         d       t        j                  | j                               5  t	        j                  |g|d      }ddd       t        j                  | j                         d       S # 1 sw Y   xY w# 1 sw Y   ;xY w)a  Saves given named tensor slices in a sharded, multi-client safe fashion.

  The method makes sure the checkpoint directory state is correct in a sharded
  mutli-client saving. Namely, we place a barrier after SaveV2 to make sure
  every client has done writing the files. And another one after
  MergeV2Checkpoints to make sure all Metadata is properly merged.

  Upon existing, the checkpoint is completed and the all directory operations
  are done.

  Args:
    mesh: The Mesh that contains the Tensors to save.
    file_prefix: The prefix of checkpoint.
    tensor_names: a list of tensor names used in save op.
    shape_and_slices: a list of shape and slice specification used in save op.
      The only supported value is "" as we don't support distributed saving with
      slices yet.
    tensors: a list of tensors used in save op. The order should match
      tensor_names.

  Returns:
    A MergeV2Checkpoints op that merged all Metadata.
  NSaveV2T)checkpoint_prefixesdestination_prefixdelete_old_dirsMergeV2Checkpoints)r   devicer   device_namer   save_v2r
   barrier	host_meshdefault_meshr   )r   r   r   r   r   merge_ops         V/home/dcms/DCMS/lib/python3.12/site-packages/tensorflow/dtensor/python/save_restore.pysharded_saver&   "   s    > zz#//#$ I
NN;.>HI DNN$h/
() (((M&H DNN$&:;	/I I s   CC$C!$C-z(experimental.dtensor.enable_save_as_bf16r   c                 V    | D ]$  }t        |t        j                        sd|_        & y)a  Allows float32 DVariables to be checkpointed and restored as bfloat16.

  The method only affects the DVariable part inside the model and leaves
  non-DTensor Variables/Tensors untouched.

  Args:
    variables: A list of tf.Variable to be enabled with bfloat16 save/restore.
      Only has effect on DTensor Variables as they go through d_variables with
      DTensor Specific logis.
  TN)
isinstancer   	DVariablesave_as_bf16)r   vs     r%   enable_save_as_bf16r,   S   s*      a!Z))*an    z'experimental.dtensor.name_based_restorecheckpoint_prefixname_tensor_dictc                 V   t        j                         st        d      |}t        |t        j
                        st	        j
                  |      }|j                         D ]_  \  }}	 t        j                  |      j                  j                         j                         dk7  rt        dj                  |            a t        j                  |g| j!                         z  t"        j$                  j'                  | j)                         d            }t        j                  t+        |j-                               g| j!                         z  t"        j$                  j'                  | j)                         d	            }t        j                  d
gt/        |      z  g| j!                         z  t"        j$                  j'                  | j)                         d	            }|j1                         D cg c]  }|j2                   }	}|j1                         D cg c]%  }t        j                  |      j5                         ' }
}t7        j8                  t        j:                               5  t=        j>                  ||||	|
|j1                         D cg c]  }|j@                   c}      }ddd       t	        j
                  tC        |j-                                     S # t        j                  $ r}t        d      |d}~ww xY wc c}w c c}w c c}w # 1 sw Y   lxY w)am  Restores from checkpoint_prefix to name based DTensors.

  It is required to have already-initialized DTensor variables that have same
  shape/dtype for the tensors being restored.

  Also, we currently only support a named based restore on a single mesh.

  Args:
    mesh: The single mesh that all Tensors would be restored to.
    checkpoint_prefix : The prefix of checkpoint to be restored.
    name_tensor_dict: A ordered dictionary of tensor_names to a DTensor. The
      DTensor shape/dtype must match the tensors being saved/restored for now.

  Returns:
    A dictionary of name to its restored DTensor value.
  z$name based restore must run eagerly.CPUz\Restoring a non CPU Tensor is not supported currently. Offending tensor name : {tensor_name})tensor_namez)Saving/Restoring tensor must be a DTensorNr   rank    )prefixr   r   input_shapesinput_layoutsdtypes)"r   executing_eagerly
ValueErrorr(   collectionsOrderedDictitemsr   fetch_layoutr   device_typeupperformatr   OpErrorpacknum_local_devices
layout_libLayout
replicatedr"   listkeyslenvaluesshape	to_stringr   r   r   r   d_tensor_restore_v2dtypezip)r   r.   r/   ordered_name_tensor_dictnamer   op_errorr   r   r8   r9   restored_cpu_tensorss               r%   name_based_restorerW   d   s   . 
	"	"	$
;
<<-	$k&=&=	>*667GH /446 EldFE			&	!	&	&	2	2	4	:	:	<	E**0&T&*BD 	D 
FE hhD2244""4>>#3!"<>
 $))+,-0F0F0HH""4>>#3!"<>, XX
tc*++,t/E/E/GG""4>>#3!"<> .F-L-L-NO6&,,O,O -335
 
v((*- 
 zz#//#$ *>> !)!#+C+J+J+LMM 
	 	 	
"
'
'
)+?@
 C  E
57<DEE" P N sC   (AK)8L*L2'LL,L)L<LLLL(z$experimental.dtensor.name_based_savec           
         t        j                         st        d      |}t        |t        j
                        st	        j
                  |      }t        j                  |g| j                         z  t        j                  j                  | j                         d            }t        j                  t        |j                               g| j                         z  t        j                  j                  | j                         d            }t        | ||dgt!        |      z  t        |j#                                      y)a7  Saves name based Tensor into a Checkpoint.

  The function prepares the input dictionary to the format of a `sharded_save`,
  so that it can take advantage of DTensor SPMD based distributed save.

  Same as restore, the function only supports saving on the single mesh.

  Args:
    mesh: The single mesh that all Tensors would be restored to.
    checkpoint_prefix : The prefix of checkpoint to be restored.
    name_tensor_dict: A ordered dictionary of tensor_names to a DTensor. The
      DTensor shape/dtype must match the tensors being saved/restored for now.
  z!name based save must run eagerly.r   r3   r5   r6   )r   r   r   r   N)r   r;   r<   r(   r=   r>   r   rE   rF   rG   rH   rI   r"   rJ   rK   r&   rL   rM   )r   r.   r/   rS   r   s        r%   name_based_saverY      s   ( 
	"	"	$
8
99-	$k&=&=	>*667GH hh 12T5K5K5MM)00;;#'>>#3!  <  => $))+,-0F0F0HH""4>>#3!"<>, 
#tc":;;+22457r-   )"__doc__r=   typingr   r   r   tensorflow.dtensor.pythonr   r   r   r	   rG   r
   tensorflow.python.eagerr   tensorflow.python.frameworkr   r   r   
tensor_libtensorflow.python.opsr   r   tf_variables tensorflow.python.util.tf_exportr   MeshstrTensorVariabler&   r,   rW   rY    r-   r%   <module>rh      s   D  $ $ ) 0 5 : / + 3 + < ( ; 6 .26-
//-sJ---.- S	:#4#445- DIz'8'889	-
 %
))<+@+@@AB- 7-` 5"=4(=(=#>  >  4<H
//HH U:$$l&;&;;<<>H =HV 1b9-7
//-7S*"3"334-7 U:$$l&;&;;<<>-7 :-7r-   