
    BVh`              
       >   d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddlmZ	 ddl
mZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ  edg       G d d ej,                  dg d                   Zd ZddZ	 	 	 	 	 ddZd Zd Z edg      	 	 	 dd       Zy)z,Utilities to warm-start TF.Learn Estimators.    N)errors)ops)	state_ops)variable_scope)	variables)
tf_logging)checkpoint_ops)checkpoint_utils)saver)saveable_object_util)	tf_exportztrain.VocabInfo)v1c                   *     e Zd ZdZ	 	 	 d fd	Z xZS )	VocabInfoa
  Vocabulary information for warm-starting.

  See `tf.estimator.WarmStartSettings` for examples of using
  VocabInfo to warm-start.

  Args:
    new_vocab: [Required] A path to the new vocabulary file (used with the model
      to be trained).
    new_vocab_size: [Required] An integer indicating how many entries of the new
      vocabulary will used in training.
    num_oov_buckets: [Required] An integer indicating how many OOV buckets are
      associated with the vocabulary.
    old_vocab: [Required] A path to the old vocabulary file (used with the
      checkpoint to be warm-started from).
    old_vocab_size: [Optional] An integer indicating how many entries of the old
      vocabulary were used in the creation of the checkpoint. If not provided,
      the entire old vocabulary will be used.
    backup_initializer: [Optional] A variable initializer used for variables
      corresponding to new vocabulary entries and OOV. If not provided, these
      entries will be zero-initialized.
    axis: [Optional] Denotes what axis the vocabulary corresponds to.  The
      default, 0, corresponds to the most common use case (embeddings or
      linear weights for binary classification / regression).  An axis of 1
      could be used for warm-starting output layers with class vocabularies.

  Returns:
    A `VocabInfo` which represents the vocabulary information for warm-starting.

  Raises:
    ValueError: `axis` is neither 0 or 1.

      Example Usage:
```python
      embeddings_vocab_info = tf.VocabInfo(
          new_vocab='embeddings_vocab',
          new_vocab_size=100,
          num_oov_buckets=1,
          old_vocab='pretrained_embeddings_vocab',
          old_vocab_size=10000,
          backup_initializer=tf.compat.v1.truncated_normal_initializer(
              mean=0.0, stddev=(1 / math.sqrt(embedding_dim))),
          axis=0)

      softmax_output_layer_kernel_vocab_info = tf.VocabInfo(
          new_vocab='class_vocab',
          new_vocab_size=5,
          num_oov_buckets=0,  # No OOV for classes.
          old_vocab='old_class_vocab',
          old_vocab_size=8,
          backup_initializer=tf.compat.v1.glorot_uniform_initializer(),
          axis=1)

      softmax_output_layer_bias_vocab_info = tf.VocabInfo(
          new_vocab='class_vocab',
          new_vocab_size=5,
          num_oov_buckets=0,  # No OOV for classes.
          old_vocab='old_class_vocab',
          old_vocab_size=8,
          backup_initializer=tf.compat.v1.zeros_initializer(),
          axis=0)

      #Currently, only axis=0 and axis=1 are supported.
  ```
  c           
          |dk7  r|dk7  rt        dj                  |            t        t        |   | |||||||      S )Nr      OThe only supported values for the axis argument are 0 and 1.  Provided axis: {})
ValueErrorformatsuperr   __new__)	cls	new_vocabnew_vocab_sizenum_oov_buckets	old_vocabold_vocab_sizebackup_initializeraxis	__class__s	           ]/home/dcms/DCMS/lib/python3.12/site-packages/tensorflow/python/training/warm_starting_util.pyr   zVocabInfo.__new__l   s]     qyTQY 3396$<A A C(	 	    )Nr   )__name__
__module____qualname____doc__r   __classcell__)r    s   @r!   r   r       s    ?L  !% r"   r   )r   r   r   r   r   r   r   c                     t        j                  |       }t        |      dkD  rt        d| d|      t	        |j                               d   S )aX  Returns name of the `var`.

  Args:
    var: A list. The list can contain either of the following:
      (i) A single `Variable`
      (ii) A single `ResourceVariable`
      (iii) Multiple `Variable` objects which must be slices of the same larger
        variable.
      (iv) A single `PartitionedVariable`

  Returns:
    Name of the `var`
  r   z`var` = z= passed as arg violates the constraints.  name_to_var_dict = r   )r   op_list_to_dictlen	TypeErrorlistkeys)varname_to_var_dicts     r!   _infer_var_namer1      sX     *99#>	Q
/24DF G G	##%	&q	))r"   c                 f   t        j                  |       rt        | g      }nt        | t              rt        d | D              rt        |       }nZt        | t        j                        rt        | g      }| j                         } n#t        dj                  t        |                   |s|}|| fS )a  Helper method for standarizing Variable and naming.

  Args:
    var: Current graph's variable that needs to be warm-started (initialized).
      Can be either of the following: (i) `Variable` (ii) `ResourceVariable`
      (iii) list of `Variable`: The list must contain slices of the same larger
        variable. (iv) `PartitionedVariable`
    prev_tensor_name: Name of the tensor to lookup in provided `prev_ckpt`. If
      None, we lookup tensor with same name as given `var`.

  Returns:
    A tuple of the Tensor name and var.
  c              3   F   K   | ]  }t        j                  |        y wNr
   _is_variable.0vs     r!   	<genexpr>z _get_var_info.<locals>.<genexpr>        :))!,:   !`var MUST be one of the following: a Variable, list of Variable or PartitionedVariable, but is {})r
   r6   r1   
isinstancer-   allvariables_libPartitionedVariable_get_variable_listr,   r   type)r/   prev_tensor_namecurrent_var_names      r!   _get_var_inforF      s     ""3'&u-3:c::&s+#}889&u-

 
 
"C
	))/S	):< < 
'	3	r"   c
                    |r|r|r|st        d      t        j                  |       r| g} nst        | t              rt        d | D              r| } nNt        | t        j                        r| j                         } n#t        dj                  t        |                   |st        |       }t        d | D              }
| D ]  }|j                         j                         }|j!                         }d}|r+t#        j$                  |j&                  |j(                        }|	dk(  r|}|d   }|}|}|}d}d}|}d}n2|	dk(  r|
}|}d	}d}d}|}|}d}|}nt        d
j                  |	            t+        j,                  t        j.                  |      |||||||||||      }t1        j2                   |||            }t5        j6                  ||      |_         y)a  Warm-starts given variable from `prev_tensor_name` tensor in `prev_ckpt`.

  Use this method when the `var` is backed by vocabulary. This method stitches
  the given `var` such that values corresponding to individual features in the
  vocabulary remain consistent irrespective of changing order of the features
  between old and new vocabularies.

  Args:
    var: Current graph's variable that needs to be warm-started (initialized).
      Can be either of the following:
      (i) `Variable`
      (ii) `ResourceVariable`
      (iii) list of `Variable`: The list must contain slices of the same larger
        variable.
      (iv) `PartitionedVariable`
    current_vocab_path: Path to the vocab file used for the given `var`.
    current_vocab_size: An `int` specifying the number of entries in the current
      vocab.
    prev_ckpt: A string specifying the directory with checkpoint file(s) or path
      to checkpoint. The given checkpoint must have tensor with name
      `prev_tensor_name` (if not None) or tensor with name same as given `var`.
    prev_vocab_path: Path to the vocab file used for the tensor in `prev_ckpt`.
    previous_vocab_size: If provided, will constrain previous vocab to the first
      `previous_vocab_size` entries.  -1 means use the entire previous vocab.
    current_oov_buckets: An `int` specifying the number of out-of-vocabulary
      buckets used for given `var`.
    prev_tensor_name: Name of the tensor to lookup in provided `prev_ckpt`. If
      None, we lookup tensor with same name as given `var`.
    initializer: Variable initializer to be used for missing entries.  If None,
      missing entries will be zero-initialized.
    axis: Axis of the variable that the provided vocabulary corresponds to.

  Raises:
    ValueError: If required args are not provided.
  zgInvalid args: Must provide all of [current_vocab_path, current_vocab_size, prev_ckpt, prev_vocab_path}.c              3   F   K   | ]  }t        j                  |        y wr4   r5   r7   s     r!   r:   z-_warm_start_var_with_vocab.<locals>.<genexpr>   r;   r<   r=   c              3   ^   K   | ]%  }|j                         j                         d     ' yw)r   N)	get_shapeas_listr7   s     r!   r:   z-_warm_start_var_with_vocab.<locals>.<genexpr>   s$     C!1;;=00215Cs   +-N)
full_shape
var_offsetr   r   r#   r   )	ckpt_pathold_tensor_namenew_row_vocab_sizenew_col_vocab_sizeold_row_vocab_sizeold_row_vocab_filenew_row_vocab_fileold_col_vocab_filenew_col_vocab_filenum_row_oov_bucketsnum_col_oov_bucketsinitializer)shapepartition_info)r   r
   r6   r>   r-   r?   r@   rA   rB   r,   r   rC   r1   sumrJ   rK   _get_save_slice_infor   _PartitionInforL   rM   r	   "_load_and_remap_matrix_initializer_get_checkpoint_filenamer   convert_to_tensorr   assign_initializer_op)r/   current_vocab_pathcurrent_vocab_size	prev_ckptprev_vocab_pathprevious_vocab_sizecurrent_oov_bucketsrD   rY   r   total_v_first_axisr9   v_shape
slice_infor[   rP   rQ   rR   rS   rT   rU   rV   rW   rX   initnew_init_vals                             r!   _warm_start_var_with_vocabro      s#   Z !3	

 H I I""3'%C3:c::
C#}889

 
 
"C
	))/S	):< < 
&s+CsCC 2:akkm##%G'')JN%44**z7L7LNn qy-"1:.*-/	 .-*-/ 3396$<A A <<";;IF(-------//!D ((7>:<L!((L9Ae2:r"   c           
         t        | t              s| Et        j                  d       t	        j
                  t        j                  j                  |       }nt        | t              rt        d | D              r<g }| D ]4  }|t	        j
                  t        j                  j                  |      z  }6 nit        d | D              r| }nTt        dj                  | D cg c]  }t        |       c}            t        dj                  t        |                   i }|D ]C  }t        |t              s|gn|}t        |      }|j                  |g       j!                  |       E |S c c}w )a  Collects and groups (possibly partitioned) variables into a dictionary.

  The variables can be provided explicitly through vars_to_warm_start, or they
  are retrieved from collections (see below).

  Args:
    vars_to_warm_start: One of the following:

      - A regular expression (string) that captures which variables to
        warm-start (see tf.compat.v1.get_collection).  This expression will
        only consider variables in the TRAINABLE_VARIABLES collection.
      - A list of strings, each representing a full variable name to warm-start.
        These will consider variables in GLOBAL_VARIABLES collection.
      - A list of Variables to warm-start.
      - `None`, in which case all variables in TRAINABLE_VARIABLES will be used.
  Returns:
    A dictionary mapping variable names (strings) to lists of Variables.
  Raises:
    ValueError: If vars_to_warm_start is not a string, `None`, a list of
      `Variables`, or a list of strings.
  z4Warm-starting variables only in TRAINABLE_VARIABLES.)scopec              3   <   K   | ]  }t        |t                y wr4   )r>   strr7   s     r!   r:   z)_get_grouped_variables.<locals>.<genexpr>U  s     
:!:a
:s   c              3   F   K   | ]  }t        j                  |        y wr4   r5   r7   s     r!   r:   z)_get_grouped_variables.<locals>.<genexpr>Z  s     J!**1-Jr<   z^If `vars_to_warm_start` is a list, it must be all `Variable` or all `str`.  Given types are {}z@`vars_to_warm_start must be a `list` or `str`.  Given type is {})r>   rs   logginginfor   get_collection	GraphKeysTRAINABLE_VARIABLESr-   r?   GLOBAL_VARIABLESr   r   rC   r1   
setdefaultappend)vars_to_warm_startlist_of_varsr9   grouped_variablestvar_names         r!   _get_grouped_variablesr   7  sd   . "C(,>,F LLGH%%))1CEL$d+

:'9
::l! 5!**MM**!5 	55 
J7IJ	J'l FFLf-?@DG@GBC C  ""(&.@)A"BD D  9aa&AAq!H  2.55a89
 
 As   E+c                 B   t        j                  |       }	 t        j                  |      }t        |      t        |j                               z
  }|rt        dj                  |            |D ci c]  }|||   
 c}S # t        j
                  $ r i cY S w xY wc c}w )a#  Returns a dictionary mapping variable names to checkpoint keys.

  The warm-starting utility expects variable names to match with the variable
  names in the checkpoint. For object-based checkpoints, the variable names
  and names in the checkpoint are different. Thus, for object-based checkpoints,
  this function is used to obtain the map from variable names to checkpoint
  keys.

  Args:
    path: path to checkpoint directory or file.
    variable_names: list of variable names to load from the checkpoint.

  Returns:
    If the checkpoint is object-based, this function returns a map from variable
    names to their corresponding checkpoint keys.
    If the checkpoint is name-based, this returns an empty dict.

  Raises:
    ValueError: If the object-based checkpoint is missing variables.
  zAttempting to warm-start from an object-based checkpoint, but found that the checkpoint did not contain values for all variables. The following variables were missing: {})
r
   r`   	saver_libobject_graph_key_mappingr   NotFoundErrorsetr.   r   r   )pathvariable_namesfnamenames_to_keysmissing_namesnames         r!   _get_object_checkpoint_renamesr   n  s    * 
3
3D
9%66u=M n%M,>,>,@(AA-
	/ 
		   
 1?	?$d#
#	?? 
		  I 
@s   B 1BBBztrain.warm_startc                 *   t        j                  dj                  |              t        |      }|i }|st	        | |j                               }d}t               }t               }i }|j                         D ]  \  }	}
|j                  |	      }|r|j                  |	       |j                  |	      }|r|j                  |	       |dz  }t        j                  dj                  |	|j                  |j                  |j                  |j                  dkD  r|j                  nd|j                  |xs d|j                   xs d             t#        |
|j                  |j                  | |j                  |j                  |j                  ||j                   |j$                  	
       1|s5|dz  }t        j                  d
j                  |	|xs d             t'        |
      dk(  r|
d   }
t)        |
|      \  }}||v rOt        j                  dj                  |||   |             t+        j,                  | |       |j/                          |||<    |rt+        j,                  | |       t        |j                               |z
  }t        |j                               |z
  }t        j                  d|       |r)t1        dj                  ||j                                     |r)t1        dj                  ||j                                     y)a	  Warm-starts a model using the given settings.

  If you are using a tf.estimator.Estimator, this will automatically be called
  during training.

  Args:
    ckpt_to_initialize_from: [Required] A string specifying the directory with
      checkpoint file(s) or path to checkpoint from which to warm-start the
      model parameters.
    vars_to_warm_start: [Optional] One of the following:

      - A regular expression (string) that captures which variables to
        warm-start (see tf.compat.v1.get_collection).  This expression will only
        consider variables in the TRAINABLE_VARIABLES collection -- if you need
        to warm-start non_TRAINABLE vars (such as optimizer accumulators or
        batch norm statistics), please use the below option.
      - A list of strings, each a regex scope provided to
        tf.compat.v1.get_collection with GLOBAL_VARIABLES (please see
        tf.compat.v1.get_collection).  For backwards compatibility reasons,
        this is separate from the single-string argument type.
      - A list of Variables to warm-start.  If you do not have access to the
        `Variable` objects at the call site, please use the above option.
      - `None`, in which case only TRAINABLE variables specified in
        `var_name_to_vocab_info` will be warm-started.

      Defaults to `'.*'`, which warm-starts all variables in the
      TRAINABLE_VARIABLES collection.  Note that this excludes variables such
      as accumulators and moving statistics from batch norm.
    var_name_to_vocab_info: [Optional] Dict of variable names (strings) to
      `tf.estimator.VocabInfo`. The variable names should be "full" variables,
      not the names of the partitions.  If not explicitly provided, the variable
      is assumed to have no (changes to) vocabulary.
    var_name_to_prev_var_name: [Optional] Dict of variable names (strings) to
      name of the previously-trained variable in `ckpt_to_initialize_from`. If
      not explicitly provided, the name of the variable is assumed to be same
      between previous checkpoint and current model.  Note that this has no
      effect on the set of variables that is warm-started, and only controls
      name mapping (use `vars_to_warm_start` for controlling what variables to
      warm-start).

  Raises:
    ValueError: If the WarmStartSettings contains prev_var_name or VocabInfo
      configuration for variable names that are not used.  This is to ensure
      a stronger check for variable configuration than relying on users to
      examine the logs.
  zWarm-starting from: {}Nr   r   zWarm-starting variable: {}; current_vocab: {} current_vocab_size: {} prev_vocab: {} prev_vocab_size: {} current_oov: {} prev_tensor: {} initializer: {}All	Unchangedzzero-initialized)	rd   re   rf   rg   rh   ri   rD   rY   r   z-Warm-starting variable: {}; prev_var_name: {}zSRequested prev_var_name {} initialize both {} and {}; calling init_from_checkpoint.zWarm-started %d variables.zYou provided the following variables in var_name_to_prev_var_name that were not used: {0}.  Perhaps you misspelled them?  Here is the list of viable variable names: {1}zYou provided the following variables in var_name_to_vocab_info that were not used: {0}.  Perhaps you misspelled them?  Here is the list of viable variable names: {1})ru   rv   r   r   r   r.   r   itemsgetadddebugr   r   r   r   r   r   ro   r   r+   rF   r
   init_from_checkpointclearr   )ckpt_to_initialize_fromr}   var_name_to_vocab_infovar_name_to_prev_var_namer   warmstarted_countprev_var_name_usedvocab_info_usedvocabless_varsr   variableprev_var_name
vocab_inforD   r/   prev_var_name_not_usedvocab_info_not_useds                    r!   
warm_startr     s/   f 
,,'../FGH,-?@#	"
 !?!2!7!7!9!;  uE/ .-335 8/h-11(;MX&'++H5J(#1mm#V
,,j.G.G""%/%>%>%B &0%>%>HM((-*F;++A/ACD !
'11'66+$..(77(88( 33
  
QELLm2{4 	5
 x=Aa[( -h F#~- -- 88>,*+;<9!"
 
/
/0G0>@



 +.'(q8/t ))*A*8:$$&(*<=2779:_L	,,+->?
	 %f%;%6%;%;%=?	@ @ 
	 V/1B1G1G1IJ	L L r"   r4   )r#   r   NNr   )z.*NN)r'   collectionstensorflow.python.frameworkr   r   tensorflow.python.opsr   r   r   r@   tensorflow.python.platformr   ru   tensorflow.python.trainingr	   r
   r   r   !tensorflow.python.training.savingr    tensorflow.python.util.tf_exportr   
namedtupler   r1   rF   ro   r   r   r    r"   r!   <module>r      s    3  . + + 0 < < 5 7 9 B 6  !"`K; ) ` #`F**R 463404+/$%t:t4n$@N !"#"&&*)-[L $[Lr"   