
    BVhX\              	       ^   d Z ddlZddlmZmZ ddlZddlmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ defdZde
j4                  dej6                  defdZ edg       G d d             Zdej<                  de
j4                  deeee   f   fdZ  G d d      Z!y)z<Training helper that checkpoints models and creates session.    N)OptionalTuple)checkpoint_management)session)distribute_lib)errors)ops)
tf_logging)saver)	tf_exportreturnc                 T    | yt        | d      r| j                  S dt        |       z  S )zReturns object name if it has one, or a message otherwise.

  This is useful for names that apper in error messages.
  Args:
    obj: Object to get the name of.
  Returns:
    name, "None", or a "no name" message.
  Nonenamez<no name for %s>)hasattrr   type)objs    Z/home/dcms/DCMS/lib/python3.12/site-packages/tensorflow/python/training/session_manager.py_maybe_namer       s.     	[sF88OS	))    sessr   pathc                 x    t        j                  d      }|r| j                  |       |j                  | |       y)z@Restores checkpoint values and SavedModel initializers if found.saved_model_initializersN)r	   get_collectionrunrestore)r   r   r   saved_model_init_opss       r   :_restore_checkpoint_and_maybe_run_saved_model_initializersr   1   s6     ++,FGHH!"
 --dr   ztrain.SessionManager)v1c                      e Zd ZdZ	 	 	 	 	 	 	 ddej
                  dej
                  dej
                  dej                  ddf
d	Z	 	 	 	 	 	 dd
ede	j                  dededeej                  ef   f
dZ	 	 	 	 	 	 	 	 	 dd
edej
                  de	j                  dededej                  fdZ	 	 	 	 	 	 dd
ede	j                  dededeej                  ef   f
dZd ed      fd
edeej                     fdZdej                  fdZdej                  deeee   f   fdZdej                  deeee   f   fdZdej                  deeee   f   fdZy)SessionManagera  Training helper that restores from checkpoint and creates session.

  This class is a small wrapper that takes care of session creation and
  checkpoint recovery. It also provides functions that to facilitate
  coordination among multiple training threads or processes.

  * Checkpointing trained variables as the training progresses.
  * Initializing variables on startup, restoring them from the most recent
    checkpoint after a crash, or wait for checkpoints to become available.

  ### Usage:

  ```python
  with tf.Graph().as_default():
     ...add operations to the graph...
    # Create a SessionManager that will checkpoint the model in '/tmp/mydir'.
    sm = SessionManager()
    sess = sm.prepare_session(master, init_op, saver, checkpoint_dir)
    # Use the session to train the graph.
    while True:
      sess.run(<my_train_op>)
  ```

  `prepare_session()` initializes or restores a model. It requires `init_op`
  and `saver` as an argument.

  A second process could wait for the model to be ready by doing the following:

  ```python
  with tf.Graph().as_default():
     ...add operations to the graph...
    # Create a SessionManager that will wait for the model to become ready.
    sm = SessionManager()
    sess = sm.wait_for_session(master)
    # Use the session to train the graph.
    while True:
      sess.run(<my_train_op>)
  ```

  `wait_for_session()` waits for a model to be initialized by other processes.

  Nlocal_init_opready_opready_for_local_init_opgraphlocal_init_run_optionszdistribute_lib.RunOptionsc                     |t        j                         }|| _        || _        || _        || _        || _        d| _        || _        || _	        ||t        d|z        yy)a_  Creates a SessionManager.

    The `local_init_op` is an `Operation` that is run always after a new session
    was created. If `None`, this step is skipped.

    The `ready_op` is an `Operation` used to check if the model is ready.  The
    model is considered ready if that operation returns an empty 1D string
    tensor. If the operation returns a non empty 1D string tensor, the elements
    are concatenated and used to indicate to the user why the model is not
    ready.

    The `ready_for_local_init_op` is an `Operation` used to check if the model
    is ready to run local_init_op.  The model is considered ready if that
    operation returns an empty 1D string tensor. If the operation returns a non
    empty 1D string tensor, the elements are concatenated and used to indicate
    to the user why the model is not ready.

    If `ready_op` is `None`, the model is not checked for readiness.

    `recovery_wait_secs` is the number of seconds between checks that
    the model is ready.  It is used by processes to wait for a model to
    be initialized or restored.  Defaults to 30 seconds.

    Args:
      local_init_op: An `Operation` run immediately after session creation.
         Usually used to initialize tables and local variables.
      ready_op: An `Operation` to check if the model is initialized.
      ready_for_local_init_op: An `Operation` to check if the model is ready
         to run local_init_op.
      graph: The `Graph` that the model will use.
      recovery_wait_secs: Seconds between checks for the model to be ready.
      local_init_run_options: RunOptions to be passed to session.run when
        executing the local_init_op.
      local_init_feed_dict: Optional session feed dictionary to use when running
        the local_init_op.

    Raises:
      ValueError: If ready_for_local_init_op is not None but local_init_op is
        None
    NzgIf you pass a ready_for_local_init_op you must also pass a local_init_op , ready_for_local_init_op [%s])r	   get_default_graph_local_init_op	_ready_op_ready_for_local_init_op_graph_recovery_wait_secs_target_local_init_run_options_local_init_feed_dict
ValueError)selfr#   r$   r%   r&   recovery_wait_secsr'   local_init_feed_dicts           r   __init__zSessionManager.__init__w   s    f }##%e'DDN$;D!DK1DDL#9D !5D*}/D 8 // 0 0 0E*r   masterr   checkpoint_dircheckpoint_filename_with_pathr   c                    || _         t        j                         }|r0t        |j                  d      r|j                  j                          t        j                  | j                   | j                  |      }	|r|rt        d      |r|s|s|	dfS |rt        |	||       |	dfS d}
t        j                  |      }|r|j                  st|r^|
|k  rYt        j                  d       t!        j"                  | j$                         |
| j$                  z  }
t        j                  |      }n|	dfS |sg|j                  stt        |	||j                         |j'                  |j(                         |	dfS )af  Creates a `Session`, and tries to restore a checkpoint.


    Args:
      master: `String` representation of the TensorFlow master to use.
      saver: A `Saver` object used to restore a model.
      checkpoint_dir: Path to the checkpoint files. The latest checkpoint in the
        dir will be used to restore.
      checkpoint_filename_with_path: Full file name path to the checkpoint file.
      wait_for_checkpoint: Whether to wait for checkpoint to become available.
      max_wait_secs: Maximum time to wait for checkpoints to become available.
      config: Optional `ConfigProto` proto used to configure the session.

    Returns:
      A pair (sess, is_restored) where 'is_restored' is `True` if
      the session could be restored, `False` otherwise.

    Raises:
      ValueError: If both checkpoint_dir and checkpoint_filename_with_path are
        set.
    _experimental_initialize_systemr&   configzFCan not provide both checkpoint_dir and checkpoint_filename_with_path.FTr   z'Waiting for checkpoint to be available.)r/   r   get_strategyr   extendedr;   r   Sessionr-   r2   r   r   get_checkpoint_statemodel_checkpoint_pathlogginginfotimesleepr.   recover_last_checkpointsall_model_checkpoint_paths)r3   r7   r   r8   r9   wait_for_checkpointmax_wait_secsr=   strategyr   	wait_timeckpts               r   _restore_checkpointz"SessionManager._restore_checkpoint   s\   > DL
 **,HGH--=?779??4<<t{{6JD7 8 9 9 +H5[$@
464Z I 55nED$44	]!:>?

4++,T---	$99.IU{ $44 ?eT//1	""4#B#BC:r   init_opc           
         | j                  |||||||      \  }}|s:||
s| j                  t        d      ||j                  ||	       |
r |
|       | j	                  |      \  }}|st        dt        |      d|
d|      | j                  |      \  }}|s*t        dt        |      d|
d| j                  d|      |S )	a>	  Creates a `Session`. Makes sure the model is ready to be used.

    Creates a `Session` on 'master'. If a `saver` object is passed in, and
    `checkpoint_dir` points to a directory containing valid checkpoint
    files, then it will try to recover the model from checkpoint. If
    no checkpoint files are available, and `wait_for_checkpoint` is
    `True`, then the process would check every `recovery_wait_secs`,
    up to `max_wait_secs`, for recovery to succeed.

    If the model cannot be recovered successfully then it is initialized by
    running the `init_op` and calling `init_fn` if they are provided.
    The `local_init_op` is also run after init_op and init_fn, regardless of
    whether the model was recovered successfully, but only if
    `ready_for_local_init_op` passes.

    If the model is recovered from a checkpoint it is assumed that all
    global variables have been initialized, in particular neither `init_op`
    nor `init_fn` will be executed.

    It is an error if the model cannot be recovered and no `init_op`
    or `init_fn` or `local_init_op` are passed.

    Args:
      master: `String` representation of the TensorFlow master to use.
      init_op: Optional `Operation` used to initialize the model.
      saver: A `Saver` object used to restore a model.
      checkpoint_dir: Path to the checkpoint files. The latest checkpoint in the
        dir will be used to restore.
      checkpoint_filename_with_path: Full file name path to the checkpoint file.
      wait_for_checkpoint: Whether to wait for checkpoint to become available.
      max_wait_secs: Maximum time to wait for checkpoints to become available.
      config: Optional `ConfigProto` proto used to configure the session.
      init_feed_dict: Optional dictionary that maps `Tensor` objects to feed
        values.  This feed dictionary is passed to the session `run()` call when
        running the init op.
      init_fn: Optional callable used to initialize the model. Called after the
        optional `init_op` is called.  The callable must accept one argument,
        the session being initialized.

    Returns:
      A `Session` object that can be used to drive the model.

    Raises:
      RuntimeError: If the model cannot be initialized or recovered.
      ValueError: If both checkpoint_dir and checkpoint_filename_with_path are
        set.
    r8   r9   rI   rJ   r=   zMModel is not initialized and no init_op or init_fn or local_init_op was given)	feed_dictzCInit operations did not make model ready for local_init.  Init op: z, init fn: z	, error: z4Init operations did not make model ready.  Init op: z, local_init_op: )rN   r*   RuntimeErrorr   _try_run_local_init_opr   _model_ready)r3   r7   rO   r   r8   r9   rI   rJ   r=   init_feed_dictinit_fnr   is_loaded_from_checkpointlocal_init_successmsgis_readys                   r   prepare_sessionzSessionManager.prepare_session  s   z '+&>&>%&C/# '? '#D
# %	T-@-@-H @ A 	A		N3	"99$?3>w3G3:3689 9 %%d+MHc w$*=*=sDE E Kr   c           	      H   | j                  |||||||      \  }}	| j                  |      \  }
}|	s|dfS |xs |}|
st        j                  d||       |dfS | j	                  |      \  }}|st        j                  d||       |dfS t        j                  d|       ||	fS )a  Creates a `Session`, recovering if possible.

    Creates a new session on 'master'.  If the session is not initialized
    and can be recovered from a checkpoint, recover it.

    Args:
      master: `String` representation of the TensorFlow master to use.
      saver: A `Saver` object used to restore a model.
      checkpoint_dir: Path to the checkpoint files. The latest checkpoint in the
        dir will be used to restore.
      checkpoint_filename_with_path: Full file name path to the checkpoint file.
      wait_for_checkpoint: Whether to wait for checkpoint to become available.
      max_wait_secs: Maximum time to wait for checkpoints to become available.
      config: Optional `ConfigProto` proto used to configure the session.

    Returns:
      A pair (sess, initialized) where 'initialized' is `True` if
      the session could be recovered and initialized, `False` otherwise.

    Raises:
      ValueError: If both checkpoint_dir and checkpoint_filename_with_path are
        set.
    rQ   FzCRestoring model from %s did not make model ready for local init: %sz4Restoring model from %s did not make model ready: %szRestored model from %s)rN   rT   rC   rD   rU   )r3   r7   r   r8   r9   rI   rJ   r=   r   rX   rY   rZ   restoring_filer[   s                 r   recover_sessionzSessionManager.recover_sessiona  s    D '+&>&>%&C/# '? '#D
# #99$?$5[#D'DNll& 5[%%d+MHcllI!3(5[LL)>:***r   Infc                    || _         |t        d      }t        |      }	 t        j                  | j                   | j
                  |      }d}d}| j                  |      \  }}|r| j                  |      \  }	}|	r|S | j                  |       |j                         | j                  z
  }
|
dk  rt        j                  ddd|fz        t        j                  d||       t        j                   | j                         )aO  Creates a new `Session` and waits for model to be ready.

    Creates a new `Session` on 'master'.  Waits for the model to be
    initialized or recovered from a checkpoint.  It's expected that
    another thread or process will make the model ready, and that this
    is intended to be used by threads/processes that participate in a
    distributed training configuration where a different thread/process
    is responsible for initializing or recovering the model being trained.

    NB: The amount of time this method waits for the session is bounded
    by max_wait_secs. By default, this function will wait indefinitely.

    Args:
      master: `String` representation of the TensorFlow master to use.
      config: Optional ConfigProto proto used to configure the session.
      max_wait_secs: Maximum time to wait for the session to become available.

    Returns:
      A `Session`. May be None if the operation exceeds the timeout
      specified by config.operation_timeout_in_ms.

    Raises:
      tf.DeadlineExceededError: if the session is not available after
        max_wait_secs.
    Nr`   r<   r   z,Session was not ready after waiting %d secs.zGWaiting for model to be ready.  Ready_for_local_init_op:  %s, ready: %s)r/   float_CountDownTimerr   r@   r-   rT   rU   _safe_closesecs_remainingr.   r   DeadlineExceededErrorrC   rD   rE   rF   )r3   r7   r=   rJ   timerr   not_ready_msgnot_ready_local_msgrY   r[   remaining_ms_after_waits              r   wait_for_sessionzSessionManager.wait_for_session  s   8 DLElmM*E
__T\\VLdm 040K0K
1--	"&"3"3D"9-+
t 


 4#;#;
; 	 1	$**$:m=MMO 	O ll =&7 jj))*3 r   r   c                 D    	 |j                          y# t        $ r Y yw xY w)zCloses a session without raising an exception.

    Just like sess.close() but ignores exceptions.

    Args:
      sess: A `Session`.
    N)close	Exceptionr3   r   s     r   rd   zSessionManager._safe_close  s$    
jjl  	s    	c                 0    t        | j                  |d      S )a  Checks if the model is ready or not.

    Args:
      sess: A `Session`.

    Returns:
      A tuple (is_ready, msg), where is_ready is True if ready and False
      otherwise, and msg is `None` if the model is ready, a `String` with the
      reason why it is not ready otherwise.
    zModel not ready)_readyr+   ro   s     r   rU   zSessionManager._model_ready  s     $..$(9::r   c                 0    t        | j                  |d      S )a\  Checks if the model is ready to run local_init_op.

    Args:
      sess: A `Session`.

    Returns:
      A tuple (is_ready, msg), where is_ready is True if ready to run
      local_init_op and False otherwise, and msg is `None` if the model is
      ready to run local_init_op, a `String` with the reason why it is not ready
      otherwise.
    zModel not ready for local init)rq   r,   ro   s     r   _model_ready_for_local_initz*SessionManager._model_ready_for_local_init  s     $//24 4r   c                 
   | j                   w| j                  |      \  }}|r]t        j                  d       |j	                  | j                   | j
                  | j                         t        j                  d       yd|fS y)an  Tries to run _local_init_op, if not None, and is ready for local init.

    Args:
      sess: A `Session`.

    Returns:
      A tuple (is_successful, msg), where is_successful is True if
      _local_init_op is None, or we ran _local_init_op, and False otherwise;
      and msg is a `String` with the reason why the model was not ready to run
      local init.
    zRunning local_init_op.)rR   optionszDone running local_init_op.TNF)r*   rs   rC   rD   r   r1   r0   )r3   r   is_ready_for_local_initrZ   s       r   rT   z%SessionManager._try_run_local_init_op  s}     &%)%E%Ed%K"s	 -.$$0J0J55 	 	723czr   )NNNN   NN)NNNF   N)	NNNNFry   NNN)__name__
__module____qualname____doc__r	   	OperationGraphr6   str	saver_libSaverr   r   r@   boolrN   r\   r_   rb   r   rk   rd   rU   rs   rT    r   r   r"   r"   J   sL   )Z &* $/3<@A0]]A0 A0  #}}	A0
 YYA0 :A0L  $ +/GG __G 	G
 &)G W__d"#GX  $# +/\\ }}\ __	\
 \ &)\ \B  $ +/@+@+ __@+ 	@+
 &)@+ W__d"#@+F !%E%L;+;+ ;+zgoo $;w ;5x}9L3M ;4//4T8C= !4"//T8C= !r   r"   opc                    | y	 |j                  |       }|,|j                  t        j                  k(  s|j                  dk(  rydj                  |D cg c]  }|j                  d       c}      }dd|z   fS c c}w # t        j                  $ rF}dt        |      vr"t        j                  d	|t        |             |dt        |      fcY d}~S d}~ww xY w)
a  Checks if the model is ready or not, as determined by op.

  Args:
    op: An op, either _ready_op or _ready_for_local_init_op, which defines the
      readiness of the model.
    sess: A `Session`.
    msg: A message to log to warning if not ready

  Returns:
    A tuple (is_ready, msg), where is_ready is True if ready and False
    otherwise, and msg is `None` if the model is ready, a `String` with the
    reason why it is not ready otherwise.
  Nrv   r   z, zutf-8FzVariables not initialized: uninitializedz%s : error [%s])r   dtypenpint32sizejoindecoder   FailedPreconditionErrorr   rC   warning)r   r   rZ   ready_valueinon_initialized_varnameses          r   rq   rq   +  s      ZHHRLk 
!2!2bhh!>


a

 $(99(341QXXg4$6 36NNNN 5)) 	A	&)3A7CF]	s5   ?A? A? A:,A? :A? ?C;CCCc                   $    e Zd ZdZddgZd Zd Zy)rc   z.A timer that tracks a duration since creation._start_time_secs_duration_secsc                 D    t        j                          | _        || _        y )N)rE   r   r   )r3   duration_secss     r   r6   z_CountDownTimer.__init__Y  s     IIKD'Dr   c                 v    | j                   t        j                         | j                  z
  z
  }t        d|      S )Nr   )r   rE   r   max)r3   diffs     r   re   z_CountDownTimer.secs_remaining]  s/    $))+0E0E"EFDq$<r   N)rz   r{   r|   r}   	__slots__r6   re   r   r   r   rc   rc   T  s    6!#34)(r   rc   )"r}   rE   typingr   r   numpyr   tensorflow.python.checkpointr   tensorflow.python.clientr   tensorflow.python.distributer   tensorflow.python.frameworkr   r	   tensorflow.python.platformr
   rC   tensorflow.python.trainingr   r    tensorflow.python.util.tf_exportr   r   r   r@   r   r   r"   r~   r   rq   rc   r   r   r   <module>r      s    C  "  > , 7 . + < 9 6* *"
//"+//9<2 %&'] ] (]@&&$__&
4#&R r   