
    AVh                        d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 ddlm
Z
 dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl dZdZdZdZdZdZ G d d      Z G d d      Zy)zClustering Operations.    )constant_op)dtypes)ops)random_seed)	array_ops)	check_ops)cond)control_flow_ops)gen_clustering_ops)math_ops)nn_impl)
random_ops)	state_ops)variable_v1)
while_loop)embedding_lookup)*squared_euclideancosinerandomkmeans_plus_pluskmc2clustersc                       e Zd ZdZeedddddfdZed        Zed	        Z	edd
       Z
d Zd Zd Zed        Zd Zd Zd Zd Zy)KMeansz)Creates the graph for k-means clustering.F   r         c
                    t         t        t        g}
t        |t              r||
vrt        d| d|
 d      t        t        g}||vrt        d| d| d      t        |t              r|n|g| _	        || _
        || _        || _        || _        t        |      | _        t!        j"                  |      d   | _        || _        |	| _        y)aB  Creates an object for generating KMeans clustering graph.

    This class implements the following variants of K-means algorithm:

    If use_mini_batch is False, it runs standard full batch K-means. Each step
    runs a single iteration of K-Means. This step can be run sharded across
    multiple workers by passing a list of sharded inputs to this class. Note
    however that a single step needs to process the full input at once.

    If use_mini_batch is True, it runs a generalization of the mini-batch
    K-means algorithm. It runs multiple iterations, where each iteration is
    composed of mini_batch_steps_per_iteration steps. Two copies of cluster
    centers are maintained: one that is updated at the end of each iteration,
    and one that is updated every step. The first copy is used to compute
    cluster allocations for each step, and for inference, while the second copy
    is the one updated each step using the mini-batch update rule. After each
    iteration is complete, this second copy is copied back the first copy.

    Note that for use_mini_batch=True, when mini_batch_steps_per_iteration=1,
    the algorithm reduces to the standard mini-batch algorithm. Also by setting
    mini_batch_steps_per_iteration = num_inputs / batch_size, the algorithm
    becomes an asynchronous version of the full-batch algorithm. Note however
    that there is no guarantee by this implementation that each input is seen
    exactly once per iteration. Also, different updates are applied
    asynchronously without locking. So this asynchronous version may not behave
    exactly like a full-batch version.

    Args:
      inputs: An input tensor or list of input tensors. It is assumed that the
        data points have been previously randomly permuted.
      num_clusters: An integer tensor specifying the number of clusters. This
        argument is ignored if initial_clusters is a tensor or numpy array.
      initial_clusters: Specifies the clusters used during initialization. One
        of the following: - a tensor or numpy array with the initial cluster
          centers. - a function f(inputs, k) that returns up to k centers from
          `inputs`.
        - "random": Choose centers randomly from `inputs`.
        - "kmeans_plus_plus": Use kmeans++ to choose centers from `inputs`.
        - "kmc2": Use the fast k-MC2 algorithm to choose centers from `inputs`.
          In the last three cases, one batch of `inputs` may not yield
          `num_clusters` centers, in which case initialization will require
          multiple batches until enough centers are chosen. In the case of
          "random" or "kmeans_plus_plus", if the input size is <= `num_clusters`
          then the entire batch is chosen to be cluster centers.
      distance_metric: Distance metric used for clustering. Supported options:
        "squared_euclidean", "cosine".
      use_mini_batch: If true, use the mini-batch k-means algorithm. Else assume
        full batch.
      mini_batch_steps_per_iteration: Number of steps after which the updated
        cluster centers are synced back to a master copy.
      random_seed: Seed for PRNG used to initialize seeds.
      kmeans_plus_plus_num_retries: For each point that is sampled during
        kmeans++ initialization, this parameter specifies the number of
        additional points to draw from the current distribution before selecting
        the best. If a negative value is specified, a heuristic is used to
        sample O(log(num_to_sample)) additional points.
      kmc2_chain_length: Determines how many candidate points are used by the
        k-MC2 algorithm to produce one new cluster centers. If a (mini-)batch
        contains less points, one new cluster center is generated from the
        (mini-)batch.

    Raises:
      ValueError: An invalid argument was passed to initial_clusters or
        distance_metric.
    z&Unsupported initialization algorithm `z`,must be one of `z`.zUnsupported distance metric `r   N)RANDOM_INITKMEANS_PLUS_PLUS_INIT	KMC2_INIT
isinstancestr
ValueErrorSQUARED_EUCLIDEAN_DISTANCECOSINE_DISTANCElist_inputs_num_clusters_initial_clusters_distance_metric_use_mini_batchint_mini_batch_steps_per_iterationrandom_seed_opsget_seed_seed_kmeans_plus_plus_num_retries_kmc2_chain_length)selfinputsnum_clustersinitial_clustersdistance_metricuse_mini_batchmini_batch_steps_per_iterationr   kmeans_plus_plus_num_retrieskmc2_chain_lengthinitialization_algorithmsdistance_metricss               T/home/dcms/DCMS/lib/python3.12/site-packages/tensorflow/python/ops/clustering_ops.py__init__zKMeans.__init__8   s    V "-.CY O"+3LL23C2D E67r;< < 3OD..66G H**:);2? @ @'56F8DL%D-D+D)D+./M+ND( ))+6q9DJ)ED&/D    c                     t        |t              sJ |t        k(  r| j                  ||      S |t        k(  r| j                  ||d      S J t        |             )a  Computes distance between each input and each cluster center.

    Args:
      inputs: list of input Tensors.
      clusters: cluster Tensor.
      distance_metric: distance metric used for clustering

    Returns:
      list of Tensors, where each element corresponds to each element in inputs.
      The value is the distance of each row to all the cluster centers.
      Currently only Euclidean distance and cosine distance are supported.
    T)inputs_normalized)r#   r(   r&   _compute_euclidean_distancer'   _compute_cosine_distancer$   )clsr6   r   r9   s       r@   _distance_graphzKMeans._distance_graph   sh     fd###44,,VX>>	O	+))
(d * 4 4 )C((UrB   c           
         g }|D ]  }t        j                  |d      5  t        j                  t        j                  |      dd      dt        j
                  ||d      z  z
  t        j                  t        j                  t        j                  |      dd            z   }|j                  |       ddd        |S # 1 sw Y   xY w)aC  Computes Euclidean distance between each input and each cluster center.

    Args:
      inputs: list of input Tensors.
      clusters: cluster Tensor.

    Returns:
      list of Tensors, where each element corresponds to each element in inputs.
      The value is the distance of each row to all the cluster centers.
    Tignore_existingr   )keepdimsr   transpose_bN)	r   colocate_withr   
reduce_sumsquarematmulr   	transposeappend)rG   r6   r   outputinpsquared_distances         r@   rE   z"KMeans._compute_euclidean_distance   s     F 
(S$7 	(  4a$GX4@@A##OOH-q4ABB 	 	&'	( 	(
( M	( 	(s   BCC	c           
         g }|s7t        j                  |d      5  t        j                  |d      }ddd       |D ]e  }t        j                  |d      5  |st        j                  |d      }|j	                  dt        j                  ||d      z
         ddd       g |S # 1 sw Y   uxY w# 1 sw Y   xY w)a  Computes cosine distance between each input and each cluster center.

    Args:
      inputs: list of input Tensor.
      clusters: cluster Tensor
      inputs_normalized: if True, it assumes that inp and clusters are
        normalized and computes the dot product which is equivalent to the
        cosine distance. Else it L2 normalizes the inputs first.

    Returns:
      list of Tensors, where each element corresponds to each element in inp.
      The value is the distance of each row to all the cluster centers.
    TrJ   r   axisNrM   )r   rO   r   l2_normalizerT   r   rR   )rG   r6   r   rD   rU   rV   s         r@   rF   zKMeans._compute_cosine_distance   s     FXt< :''q9: LS$7 L $$Sq1#a(//#xTJJKL LL
 M: :L Ls   B(AB4(B14B=	c           
         t        |t              sJ | j                  ||| j                        }g }| j                  t        k(  rG| j                         s7t        j                  |d      5  t        j                  |d      }ddd       t        ||      D ]  \  }}t        j                  |d      5  t        j                  ||d      \  }}| j                  t        k(  r|dz  }|j                  |t        j                  |dg      t        j                  |dg      f       ddd        t        | S # 1 sw Y   xY w# 1 sw Y   xY w)a  Maps input to closest cluster and the score.

    Args:
      inputs: list of input Tensors.
      clusters: Tensor of cluster centers.

    Returns:
      List of tuple, where each value in tuple corresponds to a value in inp.
      The tuple has following three elements:
      all_scores: distance of each input to each cluster center.
      score: distance of each input to closest cluster center.
      cluster_idx: index of cluster center closest to the corresponding input.
    TrJ   r   rY   Ng      ?)r#   r(   rH   r,   r'   _clusters_l2_normalizedr   rO   r   r[   zipr   nearest_neighborsrT   r   squeeze)	r5   r6   r   scoresrU   rV   scoreindices	distancess	            r@   _infer_graphzKMeans._infer_graph   sO    fd### !!&(D4I4IJFF0((* Xt< :''q9:&&) P
US$7 P(::3!L		  O3
s
)I%%i')d,-6->->w-MO	PP PP <: :P Ps   ,D85A1E8EE	c                 h    | j                   t        k(  xr | j                   xs | j                  dkD  S )z5Returns True if clusters centers are kept normalized.r   )r,   r'   r-   r/   r5   s    r@   r^   zKMeans._clusters_l2_normalized  s9    !!_4 7%%% 611A58rB   c                    t        j                  g d      }t        j                  |t        d      }t        j                  dt
        j                  d      }| j                  r| j                  dkD  rt        j                  |dd      }t        j                  | j                  t
        j                  d	      }t        j                  t        j                  |gt
        j                  
            }nK|}d}| j                  r9t        j                  t        j                  |gt
        j                  
            nd}|||||fS )a  Creates variables.

    Args:
      num_clusters: an integer Tensor providing the number of clusters.

    Returns:
      Tuple with following elements:
      - cluster_centers: a Tensor for storing cluster centers
      - cluster_centers_initialized: bool Variable indicating whether clusters
            are initialized.
      - cluster_counts: a Tensor for storing counts of points assigned to this
            cluster. This is used by mini-batch training.
      - cluster_centers_updated: Tensor representing copy of cluster centers
            that are updated every step.
      - update_in_steps: numbers of steps left before we sync
            cluster_centers_updated back to cluster_centers.
    N)shapeF)namevalidate_shapeinitialized)dtyperk   r   clusters_updatedupdate_in_stepsrn   )r   placeholder_with_defaultr   
VariableV1CLUSTERS_VAR_NAMEr   boolr-   r/   int64zerosones)r5   r7   
init_valuecluster_centerscluster_centers_initializedcluster_centers_updatedrp   cluster_countss           r@   _create_variableszKMeans._create_variables  s(   $ 33BdCJ!,,*5BO"-"8"8V[[}#6  D Dq H !, 6 6
-e!E $..

.
. "o
 #--
//<.
=?n !0o !! 
 
 nnl^6<<@B'+  8.#_6 6rB   c                     g }|D ]H  }t        j                  |d      5  |j                  t        j                  |d             ddd       J |S # 1 sw Y   VxY w)zNormalized the input data.TrJ   r   dimN)r   rO   rT   r   r[   )rG   r6   rU   rV   s       r@   _l2_normalize_datazKMeans._l2_normalize_data@  sb     F 8S$7 8g**3A678 88 M8 8s   'AA	c                    t        | j                  t              st        | j                        r,| j                  }t	        j
                  | j                        }n7t	        j
                  | j                        }t        j                  |      d   }| j                  }| j                  |      \  }}}}}t        | j                  ||| j                  | j                  | j                  | j                  |||
      j!                         }	|}
| j                  t"        k(  r8| j%                  |      }| j'                         st)        j*                  |
d      }
| j-                  ||
      \  }}}| j.                  rL| j1                  ||||      }|J t	        j2                  |g      5  | j5                  ||||      }ddd       n|
|k(  sJ | j7                  ||||      }|||||	fS # 1 sw Y   xY w)a%  Generate a training graph for kmeans algorithm.

    This returns, among other things, an op that chooses initial centers
    (init_op), a boolean variable that is set to True when the initial centers
    are chosen (cluster_centers_initialized), and an op to perform either an
    entire Lloyd iteration or a mini-batch of a Lloyd iteration (training_op).
    The caller should use these components as follows. A single worker should
    execute init_op multiple times until cluster_centers_initialized becomes
    True. Then multiple workers may execute training_op any number of times.

    Returns:
      A tuple consisting of:
      all_scores: A matrix (or list of matrices) of dimensions (num_input,
        num_clusters) where the value is the distance of an input vector and a
        cluster center.
      cluster_idx: A vector (or list of vectors). Each element in the vector
        corresponds to an input row in 'inp' and specifies the cluster id
        corresponding to the input.
      scores: Similar to cluster_idx but specifies the distance to the
        assigned cluster instead.
      cluster_centers_initialized: scalar indicating whether clusters have been
        initialized.
      init_op: an op to initialize the clusters.
      training_op: an op that runs an iteration of training.
    r   r   r   N)r#   r+   r$   callabler   convert_to_tensorr*   r   rj   r)   r~   _InitializeClustersOpFactoryr,   r2   r3   r4   opr'   r   r^   r   r[   rf   r-   _mini_batch_sync_updates_opcontrol_dependencies_mini_batch_training_op_full_batch_training_op)r5   r8   r7   r6   cluster_centers_varr{   total_countsr|   rp   init_oprz   
all_scoresrb   cluster_idxsync_updates_optraining_ops                   r@   training_graphzKMeans.training_graphI  s   6 	4))3/''(//**4+=+=>l..t/E/EF__%56q9l\\F ..|<5|*l$4d6K6K

D668O8O4#	% &(RT	 
 *O/&&v.f))+!..AF&*&7&7&P#J88
.0G
o (((##_$56 A226;3J3?AA A
  3333001<1DFk V-H[" "A As   'G((G1c                      j                   rZ j                  dkD  rKJ t        j                  d      5   fd}t	        j                  dk  |fd      cd d d        S t        j                         S # 1 sw Y   y xY w)Nr   TrJ   c                     t        j                  t        j                  j                  dz
        g      5  t        j
                  d      5  j                  t        k(  rt        j                  d      } n} d d d        t        j
                  d      5  t        j                  t        j                         g      5  t        j
                  d d      5  t        j                  t        j                  t        j                              g      5  t        j                        cd d d        cd d d        cd d d        cd d d        cd d d        S # 1 sw Y   xY w# 1 sw Y   nxY w	 d d d        n# 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        y # 1 sw Y   y xY w)Nr   TrJ   r   )r   r   r   assignr/   rO   r,   r'   r   r[   r   
zeros_likeidentity)rz   r|   r   r5   r   rp   s    r@   _fz.KMeans._mini_batch_sync_updates_op.<locals>._f  s    ''#CCaGI)  ? ""'? :&&/9")"6"6+#4 #:: ""#6M ?++##$7IJL ?&&tTB ?//&&|'0';';L'IK1  ? %--o>	? ?? ?? ?? ?? ?: :? ? ?? ? ?? ? ?? ? ?? ? ?s   G.E"?G+F.	F!>F	E.4	F	=	F	F.	G"E+	'G.E73F	;	FF	F	F.F"F.%	G.F7	3GGr   c                  0    t        j                   d      S )Nr   )r   
assign_sub)rp   s   r@   <lambda>z4KMeans._mini_batch_sync_updates_op.<locals>.<lambda>  s    I((!< rB   )r-   r/   r   rO   r	   r
   no_op)r5   rp   r   r|   r   r   s   ````` r@   r   z"KMeans._mini_batch_sync_updates_op  s     D Dq H(((_dC  >	? 	?8 yyq "<>= >  >D ##%%E >  >s   &B  B	c                 r   g }t        ||      D ]n  \  }}t        j                  |d      5  |J t        j                  |dg      }t        j
                  |      \  }}	t        j                  |      }
t        j                  |d      5  t        j                  ||      }ddd       t        j                  |d      5  t        j                  ||      }ddd       t        j                  t        j                  |	|j                        |	|
      }t        j                  ||	|
      }t        j                  t        j                  |
dg      t        j                  t        j                  t        j                  |      dz
  dg      t        j                         gd      }|t        j"                  t        j                  ||      |j                        z  z  }t        j$                  t        j"                  |z   |j                              }t        j                  ||      }||z  }ddd       t'        j(                  |      }t'        j(                  ||      }|j+                  ||g       q t-        j.                  | S # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   yxY w)a  Creates an op for training for mini batch case.

    Args:
      inputs: list of input Tensors.
      cluster_idx_list: A vector (or list of vectors). Each element in the
        vector corresponds to an input row in 'inp' and specifies the cluster id
        corresponding to the input.
      cluster_centers: Tensor Ref of cluster centers.
      total_counts: Tensor Ref of cluster counts.

    Returns:
      An op for doing an update of mini-batch k-means.
    TrJ   Nr]   rq   r   r   )r_   r   rO   r   reshapeuniquesizegatherr   unsorted_segment_sum	ones_likern   concatrx   rankr   int32cast
reciprocalr   scatter_addextendr
   group)r5   r6   cluster_idx_listrz   r   
update_opsrV   r   
unique_ids
unique_idxnum_unique_cluster_idx
old_countsold_cluster_centerscount_updatescluster_center_updatesbroadcast_shapelearning_rateupdate_countsupdate_cluster_centerss                      r@   r   zKMeans._mini_batch_training_op  s    J(89 0A[S$7 (0'''''bT: "+!1!1+!>
J!*
!;|TB 	B ''jA*	B E 	N ) 0 0* M
	N !55
,2D2DE.0 "*!>!>3"5 $**4qc:NN!!).."5"9A3?ll$,

  	(--m_=II#,#- 	- !++MM*}4cii@B!))-I-/Q(0T  ++L*,9;m(44_5?5K M (>?@a0Ab !!:..Q	B 	B	N 	N(0 (0s=   A J-J$J-J EJ-JJ- J*%J--J6	c                    g }g }t        j                  d|d   j                        }t        ||      D ]  \  }}	t	        j
                  |d      5  |j                  t        j                  ||	|             |j                  t        j                  t        j                  t        j                  t        j                  t        j                  |      d   dg            ddg      |	|             ddd        t	        j
                  |d      5  t        j                  |      t        j                  t        j                  |      |d   j                        |z   z  }
| j                         rt!        j"                  |
d	      }
ddd       t%        j&                  |
      S # 1 sw Y   ~xY w# 1 sw Y   ,xY w)
a  Creates an op for training for full batch case.

    Args:
      inputs: list of input Tensors.
      num_clusters: an integer Tensor providing the number of clusters.
      cluster_idx_list: A vector (or list of vectors). Each element in the
        vector corresponds to an input row in 'inp' and specifies the cluster id
        corresponding to the input.
      cluster_centers: Tensor Ref of cluster centers.

    Returns:
      An op for doing an update of mini-batch k-means.
    gư>r   rq   TrJ   r]   r   Nr   )r   constantrn   r_   r   rO   rT   r   r   r   r   rx   rj   add_nr   r^   r   r[   r   r   )r5   r6   r7   r   rz   cluster_sumsr}   epsilonrV   r   new_clusters_centerss              r@   r   zKMeans._full_batch_training_op  s    LN""4vay?G(89 	:[S$7 :))#{LI	K))!!NN!)))//#*>q*AB4HJG *<	9	:: :	: 
		?D	A Q%^^L9
--~6Q8M8M
N
 
	%	%	'&334HaPQ O-ABB: :Q Qs   B!F)A7F6)F3	6F?N)T)__name__
__module____qualname____doc__r    r&   rA   classmethodrH   rE   rF   rf   r^   r~   r   r   r   r   r    rB   r@   r   r   5   s    1
 !,9#./,-!$^0@ ) ),  4  2%N8-6^  D"L&&PA/F"CrB   r   c                   L    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zy)r   a  Internal class to create the op to initialize the clusters.

    The op performs this algorithm (see constructor args):

    num_remaining = num_clusters - length(cluster_centers)
    if num_remaining == 0:
      assert that cluster_centers_initialized is true
    else:
      assert that num_remaining > 0
      new_centers = choose up to num_remaining initial centers
      l2-normalize new_centers if using cosine distance
      all_centers = concat(cluster_centers, new_centers)
      cluster_centers := all_centers
      if there is a cluster_centers_updated variable:
        cluster_centers_updated := cluster_centers
      num_now_remaining = num_clusters - length(cluster_centers)
      if num_now_remaining == 0:
        cluster_centers_initialized := true
  c                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        |
| _	        t        j                  | j                        d   | _        | j                  | j                  z
  | _        t        j                  | j                   D cg c]  }t        j                  |      d    c}      | _        yc c}w )aw  Creates an op factory.

    Args:
      inputs: See KMeans constructor.
      num_clusters: An integer Tensor providing the number of clusters.
      initial_clusters: See KMeans constructor.
      distance_metric: See KMeans constructor.
      random_seed: See KMeans constructor.
      kmeans_plus_plus_num_retries: See KMeans constructor.
      kmc2_chain_length: See KMeans constructor.
      cluster_centers: The TF variable holding the initial centers. It may
        already contain some centers when the op is executed.
      cluster_centers_updated: A second TF variable to hold a copy of the
        initial centers, used for full-batch mode. In mini-batch mode,
        cluster_centers_updated is the same variable as cluster_centers.
      cluster_centers_initialized: A boolean TF variable that will be set to
        true when all the initial centers have been chosen.
    r   N)r)   r*   r+   r,   r2   r3   r4   _cluster_centers_cluster_centers_updated_cluster_centers_initializedr   rj   _num_selected_num_remainingr   r   	_num_data)r5   r6   r7   r8   r9   r   r<   r=   rz   r|   r{   is               r@   rA   z%_InitializeClustersOpFactory.__init__6  s    . DL%D-D+DDJ)ED&/D+D$;D!(CD%")>)>?BD,,t/A/AAD^^(,51	A	57DN5s   )Cc                 ,   t        j                  t        j                  | j                  dg      dt        j                  | j                  t        j                        | j                  t        j                        }t        | j                  |d      S )Nr]   r   )minvalmaxvalseedrn   div)partition_strategy)r   random_uniformr   r   r   r   r   r   r   rv   r2   r   r)   )r5   rd   s     r@   _randomz$_InitializeClustersOpFactory._random]  sf    ''$--t4}}T^^V\\:ZZllG DLL'eLLrB   c                 $   | j                   d   }| j                  t        k(  rt        j                  |d      }t        j                  |t        j                  | j                  t        j                        | j                  | j                        S )Nr   r   r   )r)   r,   r'   r   r[   r   kmeans_plus_plus_initializationr   r   r   r   rv   r2   r3   )r5   rV   s     r@   _kmeans_plus_plusz._InitializeClustersOpFactory._kmeans_plus_plusf  sm     ,,q/C/  !,c==X]]4..=tzz**, ,rB   c                 t     j                   d   t        j                        d   }t        j                  | j
                  z  t        j                        }t        j                  t        j                   j                  |      d      fd} fd}t        j                  ||ddg      \  }}|S )aq  Adds new initial cluster centers using the k-MC2 algorithm.

    In each call to the op, the provided batch is split into subsets based on
    the specified `kmc2_chain_length`. On each subset, a single Markov chain of
    the k-MC2 algorithm is used to add *one* new center cluster center. If there
    are less than `kmc2_chain_length` points in the subset, a single center is
    added using one Markov chain on the full input. It is assumed that the
    provided batch has previously been randomly permuted. Otherwise, k-MC2 may
    return suboptimal centers.

    Returns:
      An op that adds new cluster centers.
    r   rq   r   c                 0    t        j                  |       S )z&Stopping condition for the while loop.)r   less)r   _num_to_samples     r@   _condzB_InitializeClustersOpFactory._kmc2_multiple_centers.<locals>._cond  s    ]]1m,,rB   c                     fd} fd}t        j                   t        j                  j                  d      ||      }t	        j
                  j                  |d      }j                  j                  ur"t	        j
                  j                  |d      } dz   j                  t        j                  |      d   z
  fS )z5Body that adds a single new center based on a subset.c                      t        j                  d   ddg      } j                  t        k(  rt	        j
                  | d      } | S )z+Returns a random point as a cluster center.r   r   r]   r   )r   r   r,   r'   r   r[   )
new_centerfirst_shardr5   s    r@   _sample_randomzZ_InitializeClustersOpFactory._kmc2_multiple_centers.<locals>._body.<locals>._sample_random  sE     &&{1~2w?
  O3++JA>*rB   c                     	j                   z  } | 	j                   z   }| | }t        j                  |	j                  d      \  }}t        j                  t        j                  |      	j                        }t        j                  ||   ddg      }	j                  t        k(  rt        j                  |d      }t        j                  	j                  |gd      S )zEReturns previous centers as well as a new center sampled using k-MC2.r   r]   r   r   )r4   r   r`   r   kmc2_chain_initializationr   ra   r2   r   r,   r'   r   r[   r   )
startendsubsetr   re   new_center_indexnewly_sampled_centerr   r   r5   s
          r@   _sample_kmc2_chainz^_InitializeClustersOpFactory._kmc2_multiple_centers.<locals>._body.<locals>._sample_kmc2_chain  s     D+++d---U3');;D))1.9 .GGi($**6  )008H1I23R :   O3!(!5!5""+
!6!68L M !# 	#rB   r   Frl   r   )r	   r   equalr   r   r   r   r   r*   r   rj   )r   r   r   r   new_centersassigned_centersr   r5   s   `     r@   _bodyzB_InitializeClustersOpFactory._kmc2_multiple_centers.<locals>._body  s    #0 II
..++Q
/
k #))


UD		&	&d.C.C	C$++)) " UD&&9I)J1)MMMMrB   )r)   r   rj   r   r   r4   r   r   maximumminimumr   r   )	r5   
batch_sizemax_to_sampler   r   r   num_remainingr   r   s	   `      @@r@   _kmc2_multiple_centersz3_InitializeClustersOpFactory._kmc2_multiple_centersp  s     ,,q/K-a0J MMT,,,FLLBM $$,,m<aAM-/Nd ",,UEAq6BA}rB   c                 f     t        j                    j                   j                  k   fd|      S )Nc                  D    t        j                   j                  d      S Nr   )r   r   r)   rh   s   r@   r   zD_InitializeClustersOpFactory._greedy_batch_sampler.<locals>.<lambda>  s    Y--dllA> rB   )r	   r   r   r5   samplers   ` r@   _greedy_batch_samplerz2_InitializeClustersOpFactory._greedy_batch_sampler  s-    
 99T^^t':'::> rB   c                     t        j                  t        j                  | j                  | j
                        g      5   |       cd d d        S # 1 sw Y   y xY wN)r   r   r   assert_greater_equalr   r   r   s     r@   _single_batch_samplerz2_InitializeClustersOpFactory._single_batch_sampler  sJ     
	!	!		'	'8K8K	LM
O Y  s   AAc                 *   t        | j                  t              rI| j                  t        k(  r| j	                  | j
                        S | j                  | j                        S t        | j                        r&| j                  | j                  | j                        S t        j                  t        j                  | j                  t        j                   | j                        d         g      5  | j                  cd d d        S # 1 sw Y   y xY wr   )r#   r+   r$   r    r   r   r  r   r   r)   r   r   r   r   assert_equalr   rj   rh   s    r@   _choose_initial_centersz4_InitializeClustersOpFactory._choose_initial_centers  s    $((#.			;	.))$,,77))$*@*@AA	$((	)##DLL$2E2EFF##

 
 !4!4!*1G1G!H!KM% 	 & %%	& & &s   3D		Dc                      j                          j                  t        k(  rt        j                  d      t        j
                  t        j                   j                  d      fd fd      }t        j                   j                  |d      } j                   j                  ur"t        j                   j                  |d      } j                  t        j                  |      d   z
  S )z>Adds some centers and returns the number of centers remaining.r   r   r   c                       S r   r   )r   s   r@   r   z?_InitializeClustersOpFactory._add_new_centers.<locals>.<lambda>  s    { rB   c                  H    t        j                  j                   gd      S r   )r   r   r   )r   r5   s   r@   r   z?_InitializeClustersOpFactory._add_new_centers.<locals>.<lambda>  s    	  $"7"7!EqI rB   Fr   )r  r,   r'   r   r[   r	   r   r   r   r   r   r   r   r*   r   rj   )r5   all_centersar   s   `  @r@   _add_new_centersz-_InitializeClustersOpFactory._add_new_centers  s    ..0K/((!<k))t))1-/BIKK 	{5	BA$$D,A,AA




'
'5Ba	 21 555rB   c                 v    t        j                  t        j                   j                        g      5   j
                  t        k(  r j                         }n j                         }t        j                  t        j                  |d       fdt        j                        cd d d        S # 1 sw Y   y xY w)Nr   c                  D    t        j                   j                  d      S NT)r   r   r   rh   s   r@   r   z:_InitializeClustersOpFactory._initialize.<locals>.<lambda>  s    )""4#D#DdK rB   )r   r   r   assert_positiver   r+   r"   r   r
  r	   r   r   r
   r   )r5   num_now_remainings   ` r@   _initializez(_InitializeClustersOpFactory._initialize  s    		!	!!!$"5"56# 
 
" 
		9	, 779 113YY
..*A
.
K

 
 "
" 
" 
"s   A0B//B8c                      t        j                   t        j                   j                  d       fd j                        S )z#Returns the cluster initializer op.r   c                  D    t        j                   j                  d      S r  )r   r  r   rh   s   r@   r   z1_InitializeClustersOpFactory.op.<locals>.<lambda>  s    	&&t'H'H$O rB   )r	   r   r   r   r  rh   s   `r@   r   z_InitializeClustersOpFactory.op  s4    99t**A.O rB   N)r   r   r   r   rA   r   r   r   r   r  r  r
  r  r   r   rB   r@   r   r     s=    ,%7NM,Qf&6""rB   r   N)r   tensorflow.python.frameworkr   r   r   r   r0   tensorflow.python.opsr   r   r	   r
   r   r   r   r   r   r   r   #tensorflow.python.ops.embedding_opsr   (tensorflow.python.ops.gen_clustering_opsr&   r'   r    r!   r"   rt   r   r   r   rB   r@   <module>r     s     3 . + F + + & 2 4 * ) , + - , @ 7 1  * 	 gC gCTg grB   