
    BVh                        d Z ddlZddlZddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl m!Z! dZ"dZ# G d d      Z$ e!dg       G d de$             Z% G d de$      Z& e!dg       G d d ejN                               Z(d! Z) G d" d#ejN                        Z* e!d$g       G d% d&ejN                               Z+ e!d'g       G d( d)             Z, e!d*g       G d+ d,ejN                               Z- e!d-g       G d. d/ejN                               Z. e!d0g       G d1 d2e/             Z0 e!d3g       G d4 d5ejN                               Z1 e!d6g       G d7 d8ejN                               Z2 e!d9g       G d: d;ejN                               Z3 e!d<g       G d= d>ejN                               Z4 e!d?g       G d@ dAejN                               Z5 e!dBg       G dC dDejN                               Z6dE Z7y)Fa  Some common SessionRunHook classes.

Note that the symbols that are exported to v1 tf.train namespace are also
exported to v2 in tf.estimator namespace. See
https://github.com/tensorflow/estimator/blob/master/tensorflow_estimator/python/estimator/hooks/basic_session_run_hooks.py
    N)Summary)
config_pb2)
SessionLog)timeline)dtypes)errors)
meta_graph)ops)init_ops)variable_scope)gfile)
tf_logging)session_run_hook)training_util)SessionRunArgs)SummaryWriterCache)	tf_exporthookssteps_per_runc                   .    e Zd ZdZd Zd Zd Zd Zd Zy)
_HookTimerz`Base timer for determining when Hooks should trigger.

  Should not be instantiated directly.
  c                      y N selfs    b/home/dcms/DCMS/lib/python3.12/site-packages/tensorflow/python/training/basic_session_run_hooks.py__init__z_HookTimer.__init__7           c                      y)zResets the timer.Nr   r   s    r   resetz_HookTimer.reset:   s    r    c                     t         )z?Return true if the timer should trigger for the specified step.NotImplementedErrorr   steps     r   should_trigger_for_stepz"_HookTimer.should_trigger_for_step>       
r    c                     t         )a  Update the last triggered time and step number.

    Args:
      step: The current step.

    Returns:
      A pair `(elapsed_time, elapsed_steps)`, where `elapsed_time` is the number
      of seconds between the current trigger and the last one (a float), and
      `elapsed_steps` is the number of steps between the current trigger and
      the last one. Both values will be set to `None` on the first trigger.
    r$   r&   s     r   update_last_triggered_stepz%_HookTimer.update_last_triggered_stepB   s
     r    c                     t         )z@Returns the last triggered time step or None if never triggered.r$   r   s    r   last_triggered_stepz_HookTimer.last_triggered_stepP   r)   r    N)	__name__
__module____qualname____doc__r   r"   r(   r+   r-   r   r    r   r   r   1   s     
		r    r   ztrain.SecondOrStepTimer)v1c                   <     e Zd ZdZd fd	Zd Zd Zd Zd Z xZ	S )SecondOrStepTimera  Timer that triggers at most once every N seconds or once every N steps.

  This symbol is also exported to v2 in tf.estimator namespace. See
  https://github.com/tensorflow/estimator/blob/master/tensorflow_estimator/python/estimator/hooks/basic_session_run_hooks.py
  c                     | j                          || _        || _        | j                  | j                  t        d      | j                  | j                  t        d      t        t
        |           y )Nz4Either every_secs or every_steps should be provided.z0Can not provide both every_secs and every_steps.)r"   _every_secs_every_steps
ValueErrorsuperr4   r   )r   
every_secsevery_steps	__class__s      r   r   zSecondOrStepTimer.__init__]   sp    JJL!D#DD$5$5$=MNN$4+<+<+HIJJ	
T+-r    c                      d | _         d | _        y r   )_last_triggered_step_last_triggered_timer   s    r   r"   zSecondOrStepTimer.reseti   s     $D $Dr    c                    | j                   y| j                   |k(  ry| j                  /t        j                         | j                  | j                  z   k\  ry| j                  || j                   | j                  z   k\  ryy)aq  Return true if the timer should trigger for the specified step.

    Args:
      step: Training step to trigger on.

    Returns:
      True if the difference between the current time and the time of the last
      trigger exceeds `every_secs`, or if the difference between the current
      step and the last triggered step exceeds `every_steps`. False otherwise.
    TF)r>   r6   timer?   r7   r&   s     r   r(   z)SecondOrStepTimer.should_trigger_for_stepm   s}       (  D(#	11D4D4DD	D$	**T->->>	>r    c                     t        j                          }| j                  d }d }n|| j                  z
  }|| j                  z
  }|| _        || _        ||fS r   )rA   r?   r>   )r   r'   current_timeelapsed_secselapsed_stepss        r   r+   z,SecondOrStepTimer.update_last_triggered_step   s^    99;L  (lm!D$=$==lT666m ,D $D-((r    c                     | j                   S r   )r>   r   s    r   r-   z%SecondOrStepTimer.last_triggered_step   s    $$$r    NN)
r.   r/   r0   r1   r   r"   r(   r+   r-   __classcell__)r<   s   @r   r4   r4   U   s!    
.%6)%r    r4   c                   "    e Zd ZdZd Zd Zd Zy)NeverTriggerTimerzTimer that never triggers.c                     |}y)NFr   r   r'   _s      r   r(   z)NeverTriggerTimer.should_trigger_for_step   s
    Ar    c                     |}y)NrG   r   rL   s      r   r+   z,NeverTriggerTimer.update_last_triggered_step   s
    Ar    c                      y r   r   r   s    r   r-   z%NeverTriggerTimer.last_triggered_step   s    r    N)r.   r/   r0   r1   r(   r+   r-   r   r    r   rJ   rJ      s    "r    rJ   ztrain.LoggingTensorHookc                   >    e Zd ZdZ	 	 	 	 d	dZd Zd Zd Zd Zd Z	y)
LoggingTensorHooka  Prints the given tensors every N local steps, every N seconds, or at end.

  The tensors will be printed to the log, with `INFO` severity. If you are not
  seeing the logs, you might want to add the following line after your imports:

  ```python
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
  ```

  Note that if `at_end` is True, `tensors` should not include any tensor
  whose evaluation produces a side effect such as consuming additional inputs.

  @compatibility(TF2)
  Please check this [notebook][notebook] on how to migrate the API to TF2.

  [notebook]:https://github.com/tensorflow/docs/blob/master/site/en/guide/migrate/logging_stop_hook.ipynb

  @end_compatibility

  Nc                 r   |xr
 |du xr |du }|s|du |du k(  rt        d      ||dk  rt        d|z        t        |t              s|| _        |D ci c]  }|| }}nt	        |j                               | _        || _        || _        |r
t               nt        ||      | _
        || _        yc c}w )a  Initializes a `LoggingTensorHook`.

    Args:
      tensors: `dict` that maps string-valued tags to tensors/tensor names, or
        `iterable` of tensors/tensor names.
      every_n_iter: `int`, print the values of `tensors` once every N local
        steps taken on the current worker.
      every_n_secs: `int` or `float`, print the values of `tensors` once every N
        seconds. Exactly one of `every_n_iter` and `every_n_secs` should be
        provided.
      at_end: `bool` specifying whether to print the values of `tensors` at the
        end of the run.
      formatter: function, takes dict of `tag`->`Tensor` and returns a string.
        If `None` uses default printing all tensors.

    Raises:
      ValueError: if `every_n_iter` is non-positive.
    NzSeither at_end and/or exactly one of every_n_iter and every_n_secs must be provided.r   zinvalid every_n_iter=%s.r:   r;   )r8   
isinstancedict
_tag_ordersortedkeys_tensors
_formatterrJ   r4   _timer_log_at_end)r   tensorsevery_n_iterevery_n_secsat_end	formatteronly_log_at_enditems           r   r   zLoggingTensorHook.__init__   s    2 	DLD(D|t/C 		<4#78  LA$51L@AAgt$do(/0t0g0w||~.doDMDO.4E#5? 	K D 1s   
B4c                     | j                   j                          d| _        | j                  j	                         D ci c]  \  }}|t        |       c}}| _        y c c}}w )Nr   )r[   r"   _iter_countrY   items_as_graph_element_current_tensors)r   tagtensors      r   beginzLoggingTensorHook.begin   sX    KKD "]]002S& 	v&&D s   Ac                     | j                   j                  | j                        | _        | j                  rt	        | j
                        S y r   )r[   r(   re   _should_triggerr   rh   r   run_contexts     r   
before_runzLoggingTensorHook.before_run   s<    ;;>>t?O?OPDD1122r    c                 (   t        j                         }t        j                  d       | j                  j	                  | j
                        \  }}| j                  r%t        j                  | j                  |             nzg }| j                  D ]  }|j                  |d||           |'t        j                  ddj                  |      |       n%t        j                  ddj                  |             t        j                  di | y )NT)suppressz = z%s (%.3f sec)z, z%sr   )npget_printoptionsset_printoptionsr[   r+   re   rZ   logginginforV   appendjoin)r   tensor_valuesoriginalrD   rM   statsri   s          r   _log_tensorszLoggingTensorHook._log_tensors   s    ""$H&kk<<T=M=MNOL!ll4??=12e <##}S'9:;<		!_dii&6ET499U+,#(#r    c                     |}| j                   r| j                  |j                         | xj                  dz  c_        y N   )rm   r}   resultsre   )r   ro   
run_valuesrM   s       r   	after_runzLoggingTensorHook.after_run  s5    A

**+r    c                 v    | j                   r-|j                  | j                        }| j                  |       y y r   )r\   runrh   r}   )r   sessionvaluess      r   endzLoggingTensorHook.end  s2    {{4001f
 r    )NNFN)
r.   r/   r0   r1   r   rk   rp   r}   r   r   r   r    r   rQ   rQ      s4    . ! +Z$  r    rQ   c            
      
   t        j                         } dj                  t        t              }| j                  |      }t        |      dk(  r|d   S t        |      dkD  rt        d      t        j                  t        t        j                        5  t        j                  t        t        j                         g t        j                  d|t         j                  j                   gd      cd	d	d	       S # 1 sw Y   y	xY w)
aP  Gets or creates the steps_per_run variable.

  In Estimator, the user provided computation, the model_fn, is wrapped
  inside a tf.while_loop for peak performance. The iterations of the loop are
  specified by this variable, which adjusts its value on the CPU after each
  device program execution and before the next execution.

  The purpose of using a variable, rather than a constant, is to allow
  Estimator adapt the device training iterations according to the final steps
  specified by users. For example, if the user sets the steps_per_run as
  4 and steps as 10 in Estimator.train(), the steps_per_run
  variable will have the following value before each training run.

      - 1-st execution: steps_per_run = 4
      - 2-nd execution: steps_per_run = 4
      - 3-rd execution: steps_per_run = 2

  As model_fn increases the global step once per train_op invocation, the global
  step is 10 after all executions, matching the steps=10 inputs passed in by
  users.

  Returns:
    A TF non-trainable resource variable.

  Raises:
    RuntimeError: If multi steps_per_run variables were found.
  z{}_{}r   r   z)Multiple steps_per_run_var in collection.)reuseFT)initializershapedtype	trainablecollectionsuse_resourceN)r
   get_default_graphformat_HOOKS_STEPS_PER_RUN_VARget_collectionlenRuntimeErrorr   
AUTO_REUSEget_variabler   ones_initializerr   int32	GraphKeysLOCAL_VARIABLES)graphcollection_namesteps_per_run_varss      r   $get_or_create_steps_per_run_variabler     s    8 


!%NN6+=>/++O<	!a  
"
B
CC$$V>3L3LM &&--/ll$cmm&C&CD  s   AC99Dc                   0    e Zd ZdZddZd Zd Zd Zd Zy)	_MultiStepStopAtStepHookz,Hook that requests stop at a specified step.Nc                     ||t        d      ||t        d      ||dk  rt        d      || _        || _        || _        y)a  Initializes a `MultiStepStopAtStepHook`.

    This hook requests stop after either a number of steps have been
    executed or a last step has been reached. Only one of the two options can be
    specified.

    if `num_steps` is specified, it indicates the number of steps to execute
    after `begin()` is called. If instead `last_step` is specified, it
    indicates the last step we want to execute, as passed to the `after_run()`
    call.

    In Estimator, the user provided computation, the model_fn, is wrapped
    inside a tf.while_loop for peak performance. The steps_per_run variable
    determines the number of iterations of the loop before returning to the CPU.

    Args:
      num_steps: Number of steps to execute.
      last_step: Step after which to stop.
      steps_per_run: Number of steps executed per run call.

    Raises:
      ValueError: If one of the arguments is invalid.
    N0One of num_steps or last_step must be specified.4Only one of num_steps or last_step can be specified.r   z&steps_per_run should be greater than 0)r8   
_num_steps
_last_step_steps_per_run_initial_value)r   	num_steps	last_stepr   s       r   r   z!_MultiStepStopAtStepHook.__init__K  sa    0 Y.IJJ!6MNN 1?@@DODO(5D%r    c                     t        j                         | _        | j                  t        d      t	               | _        y Nz4Global step should be created to use StopAtStepHook.)r   get_global_step_global_step_tensorr   r   _steps_per_run_variabler   s    r   rk   z_MultiStepStopAtStepHook.beginm  s6    ,<<>D'OPP#G#ID r    c                     t        | j                  |z
  | j                        }| j                  j	                  ||       y )N)r   )minr   r   r   load)r   global_stepr   stepss       r   _update_steps_per_run_variablez7_MultiStepStopAtStepHook._update_steps_per_run_variables  s9    +-113E  %%eW%=r    c                     |j                  | j                        }| j                  || j                  z   | _        | j	                  ||       y r   )r   r   r   r   r   r   r   coordr   s       r   after_create_sessionz-_MultiStepStopAtStepHook.after_create_sessionx  s@    ++d667K#doo5do''W=r    c                     |j                   j                  | j                        }|| j                  k\  r|j	                          y | j                  ||j                          y r   )r   r   r   r   request_stopr   )r   ro   r   r   s       r   r   z"_MultiStepStopAtStepHook.after_run~  sN     %%))$*B*BCKdoo% 
))+{7J7JKr    )NNr   )	r.   r/   r0   r1   r   rk   r   r   r   r   r    r   r   r   H  s!    4 6DJ>
>Lr    r   ztrain.StopAtStepHookc                   0    e Zd ZdZddZd Zd Zd Zd Zy)	StopAtStepHooka  Hook that requests stop at a specified step.

  @compatibility(TF2)
  Please check this [notebook][notebook] on how to migrate the API to TF2.

  [notebook]:https://github.com/tensorflow/docs/blob/master/site/en/guide/migrate/logging_stop_hook.ipynb

  @end_compatibility
  Nc                 \    ||t        d      ||t        d      || _        || _        y)a^  Initializes a `StopAtStepHook`.

    This hook requests stop after either a number of steps have been
    executed or a last step has been reached. Only one of the two options can be
    specified.

    if `num_steps` is specified, it indicates the number of steps to execute
    after `begin()` is called. If instead `last_step` is specified, it
    indicates the last step we want to execute, as passed to the `after_run()`
    call.

    Args:
      num_steps: Number of steps to execute.
      last_step: Step after which to stop.

    Raises:
      ValueError: If one of the arguments is invalid.
    Nr   r   )r8   r   r   )r   r   r   s      r   r   zStopAtStepHook.__init__  s@    & Y.IJJ!6MNNDODOr    c                 d    t        j                         | _        | j                  t        d      y r   )r   _get_or_create_global_step_readr   r   r   s    r   rk   zStopAtStepHook.begin  s0    ,LLND'OPP (r    c                 |    | j                   0|j                  | j                        }|| j                  z   | _         y y r   )r   r   r   r   r   s       r   r   z#StopAtStepHook.after_create_session  s5    KK 8 89k#doo5do r    c                 ,    t        | j                        S r   r   r   rn   s     r   rp   zStopAtStepHook.before_run      $2233r    c                     |j                   dz   }|| j                  k\  rF|j                  j                  | j                        }|| j                  k\  r|j                          y y y r   )r   r   r   r   r   r   )r   ro   r   r   r'   s        r   r   zStopAtStepHook.after_run  s^    $$q(Kdoo%   $$T%=%=>d		   " 
! &r    rG   )	r.   r/   r0   r1   r   rk   r   rp   r   r   r    r   r   r     s!     4Q
6
4
#r    r   ztrain.CheckpointSaverListenerc                   (    e Zd ZdZd Zd Zd Zd Zy)CheckpointSaverListenera  Interface for listeners that take action before or after checkpoint save.

  `CheckpointSaverListener` triggers only in steps when `CheckpointSaverHook` is
  triggered, and provides callbacks at the following points:
   - before using the session
   - before each call to `Saver.save()`
   - after each call to `Saver.save()`
   - at the end of session

  To use a listener, implement a class and pass the listener to a
  `CheckpointSaverHook`, as in this example:

  ```python
  class ExampleCheckpointSaverListener(CheckpointSaverListener):
    def begin(self):
      # You can add ops to the graph here.
      print('Starting the session.')
      self.your_tensor = ...

    def before_save(self, session, global_step_value):
      print('About to write a checkpoint')

    def after_save(self, session, global_step_value):
      print('Done writing checkpoint.')
      if decided_to_stop_training():
        return True

    def end(self, session, global_step_value):
      print('Done with the session.')

  ...
  listener = ExampleCheckpointSaverListener()
  saver_hook = tf.estimator.CheckpointSaverHook(
      checkpoint_dir, listeners=[listener])
  with
  tf.compat.v1.train.MonitoredTrainingSession(chief_only_hooks=[saver_hook]):
    ...
  ```

  A `CheckpointSaverListener` may simply take some action after every
  checkpoint save. It is also possible for the listener to use its own schedule
  to act less frequently, e.g. based on global_step_value. In this case,
  implementors should implement the `end()` method to handle actions related to
  the last checkpoint save. But the listener should not act twice if
  `after_save()` already handled this last checkpoint save.

  A `CheckpointSaverListener` can request training to be stopped, by returning
  True in `after_save`. Please note that, in replicated distributed training
  setting, only `chief` should use this behavior. Otherwise each worker will do
  their own evaluation, which may be wasteful of resources.
  c                      y r   r   r   s    r   rk   zCheckpointSaverListener.begin  r   r    c                      y r   r   r   r   global_step_values      r   before_savez#CheckpointSaverListener.before_save  r   r    c                      y r   r   r   s      r   
after_savez"CheckpointSaverListener.after_save  r   r    c                      y r   r   r   s      r   r   zCheckpointSaverListener.end  r   r    N)r.   r/   r0   r1   rk   r   r   r   r   r    r   r   r     s    2h				r    r   ztrain.CheckpointSaverHookc                   V    e Zd ZdZ	 	 	 	 	 	 	 ddZd Zd Zd Zd Zd Z	d	 Z
d
 Zd Zy)CheckpointSaverHookz+Saves checkpoints every N steps or seconds.Nc	                    t        j                  d       ||t        d      || _        || _        t
        j                  j                  ||      | _        || _	        t        ||      | _        |xs g | _        d| _        || _        y)a  Initializes a `CheckpointSaverHook`.

    Args:
      checkpoint_dir: `str`, base directory for the checkpoint files.
      save_secs: `int`, save every N secs.
      save_steps: `int`, save every N steps.
      saver: `Saver` object, used for saving.
      checkpoint_basename: `str`, base name for the checkpoint files.
      scaffold: `Scaffold`, use to get saver object.
      listeners: List of `CheckpointSaverListener` subclass instances. Used for
        callbacks that run immediately before or after this hook saves the
        checkpoint.
      save_graph_def: Whether to save the GraphDef and MetaGraphDef to
        `checkpoint_dir`. The GraphDef is saved after the session is created as
        `graph.pbtxt`. MetaGraphDefs are saved out for every checkpoint as
        `model.ckpt-*.meta`.

    Raises:
      ValueError: One of `save_steps` or `save_secs` should be set.
      ValueError: At most one of `saver` or `scaffold` should be set.
    zCreate CheckpointSaverHook.Nz+You cannot provide both saver and scaffold.rS   i@B )rv   rw   r8   _saver_checkpoint_dirospathry   
_save_path	_scaffoldr4   r[   
_listeners_steps_per_run_save_graph_def)	r   checkpoint_dir	save_secs
save_stepssavercheckpoint_basenamescaffold	listenerssave_graph_defs	            r   r   zCheckpointSaverHook.__init__  s    < LL./X1DEEDK)Dggll>3FGDODN#*6DKo2DO "D)Dr    c                     || _         y r   r   r   r   s     r   _set_steps_per_runz&CheckpointSaverHook._set_steps_per_run=  
    'Dr    c                     t        j                  | j                        | _        t	        j
                         | _        | j                  t        d      | j                  D ]  }|j                           y )Nz9Global step should be created to use CheckpointSaverHook.)
r   getr   _summary_writerr   r   r   r   r   rk   )r   ls     r   rk   zCheckpointSaverHook.begin@  sf    -11$2F2FGD,LLND'
EG G__ ggir    c                 l   |j                  | j                        }| j                  rCt        j                  t        j                         j                  d      | j                  d       | j                         r| j                         j                  nd }t        j                         }t        j                  |j                  d      |      }| j                  j                  |       | j                  j                  |       | j!                  ||       | j"                  j%                  |       y )NT)
add_shapeszgraph.pbtxt)	graph_def	saver_def)r   r   r   r   write_graphr
   r   as_graph_defr   
_get_saverr   r	   create_meta_graph_defr   	add_graphadd_meta_graph_saver[   r+   )r   r   r   r   r   r   meta_graph_defs          r   r   z(CheckpointSaverHook.after_create_sessionI  s    ++d667K 



!
.
.$
.
?


/ 04/@!++dI!!#E55$$$5LN""5)''7JJw$KK**;7r    c                 ,    t        | j                        S r   r   rn   s     r   rp   zCheckpointSaverHook.before_run\  r   r    c                    |j                   }| j                  j                  || j                  z         r|j                  j                  | j                        }| j                  j                  |      rI| j                  j                  |       | j                  |j                  |      r|j                          y y y y r   )
r   r[   r(   r   r   r   r   r+   r   r   r   ro   r   stale_global_stepr   s        r   r   zCheckpointSaverHook.after_run_  s    "**{{**+<+/+>+>,? @  ''++D,D,DEk		,	,[	9..{;::k));7

"
"
$ 8 
:	@r    c                     |j                  | j                        }|| j                  j                         k7  r| j	                  ||       | j
                  D ]  }|j                  ||        y r   )r   r   r[   r-   r   r   r   )r   r   r   r   s       r   r   zCheckpointSaverHook.endj  sZ    D445IDKK3355
jj)$__  eeGY r    c                 j   t        j                  d|       | j                  D ]  }|j                  ||        t        j                  d|| j                         | j                         j                  || j                  || j                         | j                  j                  t        t        j                  | j                        |       t        j                  d|       d}| j                  D ];  }|j                  ||      st        j                  dj                  |             d}= |S )	z1Saves the latest checkpoint, returns should_stop.z;Calling checkpoint listeners before saving checkpoint %d...z"Saving checkpoints for %d into %s.)r   write_meta_graph)statuscheckpoint_pathz:Calling checkpoint listeners after saving checkpoint %d...FzJA CheckpointSaverListener requested that training be stopped. listener: {}T)rv   rw   r   r   r   r   saver   r   add_session_logr   
CHECKPOINTr   r   )r   r   r'   r   should_stops        r   r   zCheckpointSaverHook._saveq  s   LLN__ #mmGT"# LL5tT__MOO7DOO,0,@,@  B(((($//	K LLMK__ 	
gt	$!6!9	&  r    c                 z   | j                   | j                   S | j                  | j                  j                  S t        j                  j
                  }t        j                  |      }|st        dj                  |            t        |      dkD  rt        dj                  |            |d   | _         |d   S )Nz_No items in collection {}. Please add a saver to the collection or provide a saver or scaffold.r   zgMore than one item in collection {}. Please indicate which one to use by passing it to the constructor.r   )
r   r   r   r
   r   SAVERSr   r   r   r   )r   collection_keysaverss      r   r   zCheckpointSaverHook._get_saver  s    {{[[		#^^!!! ]]))N/F,,2F>,BD D 
VqO6.!# #
 )DK!9r    )NNNz
model.ckptNNT)r.   r/   r0   r1   r   r   rk   r   rp   r   r   r   r   r   r    r   r   r     sJ    3 #/",*\(8&4	% 2r    r   ztrain.StepCounterHookc                   >    e Zd ZdZ	 	 	 	 d	dZd Zd Zd Zd Zd Z	y)
StepCounterHookz"Hook that counts steps per second.Nc                     |d u |d u k(  rt        d      t        ||      | _        || _        || _        d | _        d| _        y )NzAexactly one of every_n_steps and every_n_secs should be provided.)r;   r:   r   )r8   r4   r[   r   _output_dir_last_global_stepr   )r   every_n_stepsr_   
output_dirsummary_writers        r   r   zStepCounterHook.__init__  s[     	<4#78
MO O#!l<DK *D!D!DDr    c                     || _         y r   r   r   s     r   r   z"StepCounterHook._set_steps_per_run  r   r    c                 <   | j                   0| j                  r$t        j                  | j                        | _         t	        j
                         | _        | j                  t        d      t	        j                         j                  j                  dz   | _        y )Nz5Global step should be created to use StepCounterHook.z/sec)r   r  r   r   r   r   r   r   r   opname_summary_tagr   s    r   rk   zStepCounterHook.begin  s    #(8(8/33D4D4DEd,LLND'
AC C%557::??&HDr    c                 ,    t        | j                        S r   r   rn   s     r   rp   zStepCounterHook.before_run  r   r    c                     ||z  }| j                   Ht        t        j                  | j                  |      g      }| j                   j	                  ||       t        j                  d| j                  |       y )N)ri   simple_value)valuez%s: %g)r   r   Valuer  add_summaryrv   rw   )r   rE   elapsed_timer   steps_per_secsummarys         r   _log_and_recordzStepCounterHook._log_and_record  si    !L0M'
--D--M
J 	g &&w<LL4,,m<r    c                    |}|j                   }| j                  j                  || j                  z         rs|j                  j                  | j                        }| j                  j                  |      r3| j                  j                  |      \  }}|| j                  |||       || j                  k(  r1t        j                  t        j                  dd|| j                         || _	        y )Na  It seems that global step (tf.train.get_global_step) has not been increased. Current value (could be stable): %s vs previous value: %s. You could increase the global step by passing tf.train.get_global_step() to Optimizer.apply_gradients or Optimizer.minimize.   )r   r[   r(   r   r   r   r   r+   r!  r  rv   log_first_nWARN)r   ro   r   rM   r   r   r  rE   s           r   r   zStepCounterHook.after_run  s    A"**{{**+<+/+>+>,? @  ''++D,D,DEk		,	,[	9&*kk&L&L'#m#


}lK
H D222 
,,  "#$5t7M7MO /Dr    )d   NNN)
r.   r/   r0   r1   r   r   rk   rp   r!  r   r   r    r   r  r    s2    * ! "	"(I4= /r    r  z train.NanLossDuringTrainingErrorc                       e Zd Zd Zy)NanLossDuringTrainingErrorc                      y)NzNaN loss during training.r   r   s    r   __str__z"NanLossDuringTrainingError.__str__  s    &r    N)r.   r/   r0   r*  r   r    r   r(  r(    s    'r    r(  ztrain.NanTensorHookc                   $    e Zd ZdZddZd Zd Zy)NanTensorHookzwMonitors the loss tensor and stops training if loss is NaN.

  Can either fail with exception or just stop training.
  c                      || _         || _        y)zInitializes a `NanTensorHook`.

    Args:
      loss_tensor: `Tensor`, the loss tensor.
      fail_on_nan_loss: `bool`, whether to raise exception when loss is NaN.
    N)_loss_tensor_fail_on_nan_loss)r   loss_tensorfail_on_nan_losss      r   r   zNanTensorHook.__init__  s     $D-Dr    c                 ,    t        | j                        S r   )r   r.  rn   s     r   rp   zNanTensorHook.before_run	  s    $++,,r    c                     t        j                  |j                        rOd}| j                  rt	        j
                  |       t        t	        j                  |       |j                          y y )NzModel diverged with loss = NaN.)	rs   isnanr   r/  rv   errorr(  warningr   )r   ro   r   failure_messages       r   r   zNanTensorHook.after_run  sQ    	xx
""#9o			o&(((  " $r    N)T)r.   r/   r0   r1   r   rp   r   r   r    r   r,  r,    s    
.-	#r    r,  ztrain.SummarySaverHookc                   D    e Zd ZdZ	 	 	 	 	 	 d	dZd Zd Zd Zd
dZd Z	y)SummarySaverHookzSaves summaries every N steps.Nc                     ||||t        d      || _        || _        || _        || _        t        ||      | _        y)a  Initializes a `SummarySaverHook`.

    Args:
      save_steps: `int`, save summaries every N steps. Exactly one of
        `save_secs` and `save_steps` should be set.
      save_secs: `int`, save summaries every N seconds.
      output_dir: `string`, the directory to save the summaries to. Only used if
        no `summary_writer` is supplied.
      summary_writer: `SummaryWriter`. If `None` and an `output_dir` was passed,
        one will be created accordingly.
      scaffold: `Scaffold` to get summary_op if it's not provided.
      summary_op: `Tensor` of type `string` containing the serialized `Summary`
        protocol buffer or a list of `Tensor`. They are most likely an output by
        TF summary methods like `tf.compat.v1.summary.scalar` or
        `tf.compat.v1.summary.merge_all`. It can be passed in as one tensor; if
        more than one, they must be passed in as a list.

    Raises:
      ValueError: Exactly one of scaffold or summary_op should be set.
    Nz7Exactly one of scaffold or summary_op must be provided.rS   )r8   _summary_opr   r  r   r4   r[   )r   r   r   r  r  r   
summary_ops          r   r   zSummarySaverHook.__init__  s\    6 
	j0		*"8
CE E!D)D!DDN#*6DKr    c                     | j                   0| j                  r$t        j                  | j                        | _         d | _        t        j                         | _        | j                  t        d      y )Nz6Global step should be created to use SummarySaverHook.)	r   r  r   r   
_next_stepr   r   r   r   r   s    r   rk   zSummarySaverHook.beginC  sj    #(8(8/33D4D4DEdDO,LLND'
BD D (r    c                    | j                   d u xs% | j                  j                  | j                         | _        d| j                  i}| j                  r#| j                         | j                         |d<   t        |      S )Nr   r   )r>  r[   r(   _request_summaryr   _get_summary_opr   )r   ro   requestss      r   rp   zSummarySaverHook.before_runL  sy    4 	=++DOO< 	 t778H					+"224(##r    c                 "   |}| j                   sy |j                  d   }|dz   }| j                  | j                  r%|j                  j                  | j                        }| j                  4| j                   j                  t        t        j                        |       | j                  rY| j                  j                  |       d|j                  v r0|j                  d   D ]  }| j                   j                  ||         |dz   | _        y )Nr   r   )r  r   )r   r   r>  r@  r   r   r   r  r   STARTr[   r+   r  )r   ro   r   rM   r   r   r   s          r   r   zSummarySaverHook.after_runW  s    A"**=9#a'K$"7"7''++D,D,DEk
**
J,,
-{< 
kk,,[9	j((	(!)))4 	AG



*
*7K
@	A "AoDOr    c                 R    | j                   r| j                   j                          y y r   )r   flush)r   r   s     r   r   zSummarySaverHook.endm  s"    
  " r    c                     d}| j                   | j                   }n,| j                  j                  | j                  j                  }|yt        |t              s|gS |S )zFetches the summary op either from self._summary_op or self._scaffold.

    Returns:
      Returns a list of summary `Tensor`.
    N)r;  r   r<  rT   list)r   r<  s     r   rA  z SummarySaverHook._get_summary_opq  s`     J###j		"	"	.>>,,jj$'\r    )NNNNNNr   )
r.   r/   r0   r1   r   rk   rp   r   r   rA  r   r    r   r9  r9    s9    & "$6ND	$&,#r    r9  ztrain.GlobalStepWaiterHookc                   "    e Zd ZdZd Zd Zd Zy)GlobalStepWaiterHookaD  Delays execution until global step reaches `wait_until_step`.

  This hook delays execution until global step reaches to `wait_until_step`. It
  is used to gradually start workers in distributed settings. One example usage
  would be setting `wait_until_step=int(K*log(task_id+1))` assuming that
  task_id=0 is the chief.
  c                     || _         y)zInitializes a `GlobalStepWaiterHook`.

    Args:
      wait_until_step: an `int` shows until which global step should we wait.
    N)_wait_until_step)r   wait_until_steps     r   r   zGlobalStepWaiterHook.__init__  s     ,Dr    c                 r    d| _         t        j                         | _        | j                  t	        d      y )NFz;Global step should be created to use _GlobalStepWaiterHook.)_worker_is_startedr   r   r   r   r   s    r   rk   zGlobalStepWaiterHook.begin  s>    #D,LLND'
GI I (r    c                    | j                   ry | j                  dk  rd| _         y t        j                  d| j                         d}	 |j                  j                  | j                        }|| j                  k\  rd| _         y ||z
  dkD  r#t        j                  d| j                  |       |}t        j                  d       })Nr   Tz4Waiting for global step %d before starting training.i  zHWaiting for global step %d before starting training. Current step is %d.g      ?)	rO  rL  rv   rw   r   r   r   rA   sleep)r   ro   last_logged_stepcurrent_steps       r   rp   zGlobalStepWaiterHook.before_run  s    ! $dLLG&&(
 ((,,T-E-EFl	..	."&	(	(4	/"#'#8#8,	H (
jjo r    N)r.   r/   r0   r1   r   rk   rp   r   r    r   rJ  rJ    s    ,Ir    rJ  ztrain.FinalOpsHookc                   .    e Zd ZdZddZed        Zd Zy)FinalOpsHookz9A hook which evaluates `Tensors` at the end of a session.Nc                 .    || _         || _        d| _        y)a  Initializes `FinalOpHook` with ops to run at the end of the session.

    Args:
      final_ops: A single `Tensor`, a list of `Tensors` or a dictionary of names
        to `Tensors`.
      final_ops_feed_dict: A feed dictionary to use when running
        `final_ops_dict`.
    N)
_final_ops_final_ops_feed_dict_final_ops_values)r   	final_opsfinal_ops_feed_dicts      r   r   zFinalOpsHook.__init__  s      DO 3D!Dr    c                     | j                   S r   )rY  r   s    r   final_ops_valueszFinalOpsHook.final_ops_values  s    !!!r    c                     | j                   .	 |j                  | j                   | j                        | _        y y # t        j
                  t        f$ r}t        j                  d       |d }~ww xY w)N)	feed_dicta  An OutOfRangeError or StopIteration exception is raised by the code in FinalOpsHook. This typically means the Ops running by the FinalOpsHook have a dependency back to some input source, which should not happen. For example, for metrics in tf.estimator.Estimator, all metrics functions return two Ops: `value_op` and  `update_op`. Estimator.evaluate calls the `update_op` for each batch of the data in input source and, once it is exhausted, it call the `value_op` to get the metric values. The `value_op` here should have dependency back to variables reading only, rather than reading another batch from input. Otherwise, the `value_op`, executed by `FinalOpsHook`, triggers another data reading, which ends OutOfRangeError/StopIteration. Please fix that.)	rW  r   rX  rY  r   OutOfRangeErrorStopIterationrv   r6  )r   r   es      r   r   zFinalOpsHook.end  sq    "!(OOt'@'@ "- "B # $$m4 	  s   ,< A1A,,A1r   )r.   r/   r0   r1   r   propertyr]  r   r   r    r   rU  rU    s#    A" " "r    rU  ztrain.FeedFnHookc                       e Zd ZdZd Zd Zy)
FeedFnHookz4Runs `feed_fn` and sets the `feed_dict` accordingly.c                     || _         y)zInitializes a `FeedFnHook`.

    Args:
      feed_fn: function that takes no arguments and returns `dict` of `Tensor`
        to feed.
    N)feed_fn)r   rg  s     r   r   zFeedFnHook.__init__  s     DLr    c                 L    t        j                  d | j                               S )N)fetchesr_  )r   r   rg  rn   s     r   rp   zFeedFnHook.before_run  s     **0 0r    N)r.   r/   r0   r1   r   rp   r   r    r   re  re    s    <0r    re  ztrain.ProfilerHookc                   :    e Zd ZdZ	 	 	 	 	 ddZd Zd Zd Zd Zy)	ProfilerHooka  Captures CPU/GPU profiling information every N steps or seconds.

  This produces files called "timeline-<step>.json", which are in Chrome
  Trace format.

  For more information see:
  https://github.com/catapult-project/catapult/blob/master/tracing/README.md
  Nc                     t         j                  j                  |d      | _        t	        j
                  |      | _        || _        || _        t        ||      | _
        y)a0  Initializes a hook that takes periodic profiling snapshots.

    `options.run_metadata` argument of `tf.Session.Run` is used to collect
    metadata about execution. This hook sets the metadata and dumps it in Chrome
    Trace format.


    Args:
      save_steps: `int`, save profile traces every N steps. Exactly one of
        `save_secs` and `save_steps` should be set.
      save_secs: `int` or `float`, save profile traces every N seconds.
      output_dir: `string`, the directory to save the profile traces to.
        Defaults to the current directory.
      show_dataflow: `bool`, if True, add flow events to the trace connecting
        producers and consumers of tensors.
      show_memory: `bool`, if True, add object snapshot events to the trace
        showing the sizes and lifetimes of tensors.
    ztimeline-{}.jsonrS   N)r   r   ry   _output_filer   r   _file_writer_show_dataflow_show_memoryr4   r[   )r   r   r   r  show_dataflowshow_memorys         r   r   zProfilerHook.__init__  sP    0 Z1CDD*..z:D'D#D#*6DKr    c                 r    d | _         t        j                         | _        | j                  t	        d      y )Nz2Global step should be created to use ProfilerHook.)r>  r   r   r   r   r   s    r   rk   zProfilerHook.begin  s7    DO,LLND'MNN (r    c                 $   | j                   d uxr% | j                  j                  | j                         | _        d| j                  i}| j                  r.t        j                  t
        j                  j                        nd }t        ||      S )Nr   )trace_level)options)	r>  r[   r(   r@  r   r   
RunOptions
FULL_TRACEr   )r   ro   rB  optss       r   rp   zProfilerHook.before_run$  s    t# 	=++DOO< 	 t778H    	**?*?*J*JK&* 	 (D11r    c                    |j                   d   }| j                  | j                  j                  |       |dz   }| j                  r|j
                  j                  | j                        }| j                  j                  |       | j                  || j                  j                  |      |j                  j                         | j                  j                  |j                  d|z         |dz   | _        y )Nr   r   zstep_%d)r   r>  r[   r+   r@  r   r   r   r   rm  r   run_metadata
step_statsrn  add_run_metadatar   s        r   r   zProfilerHook.after_run/  s    "**=9 kk,,->?#a'K''++D,D,DEk
kk,,[9
jjd//66{C((335
(()@)@)2[)@B "AoDOr    c                    t        j                  d||       t        j                  |d      5 }t	        j
                  |      }|j                  |j                  | j                  | j                               d d d        y # 1 sw Y   y xY w)Nz!Saving timeline for %d into '%s'.w)rq  rr  )
rv   rw   r   Openr   Timelinewritegenerate_chrome_trace_formatro  rp  )r   r'   	save_pathr|  ftraces         r   r   zProfilerHook._save@  s~    LL4dIF	Is	# Qq
+egg

,
, //T=N=N - PQQ Q Qs   ABB)NN TF)	r.   r/   r0   r1   r   rk   rp   r   r   r   r    r   rk  rk    s3     ! 6>O	2&"Qr    rk  c                 p   t        j                         }t        | t              s/t	        | d      r| j
                  |k7  rt        d| d|d      | S d| v r|j                  |       }|S |j                  | dz         }	 |j                  | dz          t        d| z        # t        t        f$ r Y |S w xY w)	zRetrieves Graph element.r   zPassed z< should have graph attribute that is equal to current graph .:z:0z:1zLName %s is ambiguous, as this `Operation` has multiple outputs (at least 2).)	r
   r   rT   strhasattrr   r8   as_graph_elementKeyError)objr   elements      r   rg   rg   I  s    



!%	C	3 CII$614e= > >JCZ$$S)G 
. $$S4Z0G.S4Z(  '),- . . j! 

 
.s    B" "B54B5)8r1   r   rA   numpyrs   %tensorflow.core.framework.summary_pb2r   tensorflow.core.protobufr   tensorflow.core.util.event_pb2r   tensorflow.python.clientr   tensorflow.python.frameworkr   r   r	   r
   tensorflow.python.opsr   r   tensorflow.python.platformr   r   rv   tensorflow.python.trainingr   r   +tensorflow.python.training.session_run_hookr   %tensorflow.python.training.summary_ior    tensorflow.python.util.tf_exportr   r   r   r   r4   rJ   SessionRunHookrQ   r   r   r   r   r   r  r   r(  r,  r9  rJ  rU  re  rk  rg   r   r    r   <module>r     s   
   9 / 5 - . . 2 + * 0 , < 7 4 F D 6	$ ! !H ()*@%
 @% +@%F
  ()*m (77 m  +m `,^=L/>> =L@ %&'<#%44 <# (<#~ ./0?	 ?	 1?	D *+,R*99 R -Rj &'(L/&55 L/ )L/^ 123' ' 4' $%&#$33 # '#> '()i'66 i *iX +,--+:: - .-` #$%(#22 ( &(V !"#0!00 0 $0" #$%QQ#22 QQ &QQhr    