
    BVhB"              
          d Z ddlZddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ dZdZdZdZdZdZ ed       G d d ej<                  dg d                   Z	 	 ddZ d Z!d Z"d Z#y)z+TPU system metadata and associated tooling.    N)
config_pb2)session)device_util)contextconfig)device)errors)ops)
tf_logging)tpu)	tf_exporti i   
tpu_workercoordinator) localz"tpu.experimental.TPUSystemMetadatac                   "     e Zd ZdZ fdZ xZS )TPUSystemMetadataa  Describes some metadata about the TPU system.

  Attributes:
    num_cores: interger. Total number of TPU cores in the TPU system.
    num_hosts: interger. Total number of hosts (TPU workers) in the TPU system.
    num_of_cores_per_host: interger. Number of TPU cores per host (TPU worker).
    topology: an instance of `tf.tpu.experimental.Topology`, which describes the
      physical topology of TPU system.
    devices: a tuple of strings, which describes all the TPU devices in the
      system.
  c                 4    t         t        |   | |||||      S N)superr   __new__)cls	num_cores	num_hostsnum_of_cores_per_hosttopologydevices	__class__s         Y/home/dcms/DCMS/lib/python3.12/site-packages/tensorflow/python/tpu/tpu_system_metadata.pyr   zTPUSystemMetadata.__new__=   s*    "!9i9N&11    )__name__
__module____qualname____doc__r   __classcell__)r   s   @r    r   r   (   s    
1 1r!   r   r   r   r   r   r   c           	      >   d}g }t        j                  t              }t        j                         rat        j                         }|D cg c]A  }t        j                  t        j                  |j                        |j                  dd      C }}nd}	 t        j                  d|        	 t        j                          j#                         5  t        j$                  | t'        t(        |            5 }	|	j+                         }	 ddd       ddd       |D ]h  }t6        j8                  j;                  |j                        }|j                  d	k(  s<||j<                     j?                  |j@                         |dz  }j d}|rhtC        |jE                         D cg c]  }tG        |       c}      }tG        |      dk7  rtI        d
jK                  |            |jM                         }d}|r)|stI        djK                  | |            tO        | |      }d }tQ        tS        ||            }tU        |tG        |      |||      }|rt        j                  d       t        j                  d|jV                         t        j                  d|jX                         t        j                  d|jZ                         |j\                  D ]  }t        j                  d|        |S t        j                  d|       |S c c}w # 1 sw Y   nxY w	 ddd       n# 1 sw Y   nxY wni# t,        j.                  $ rS d| z  }
|t0        k  r7t        j2                  d|
       t        j2                  d|t0               |dz  }nt5        |
      Y nw xY wc c}w )z<Automatically detects the TPU system metadata in the system.r      z8Querying Tensorflow master (%s) for TPU system metadata.r   NzFailed to connect to the Tensorflow master. The TPU worker may not be ready (still scheduling) or the Tensorflow master address is incorrect: got (%s).z%szRetrying (%d/%d).TPUzHTPU cores on each host is not same. This should not happen!. devices: {}zCannot find any TPU cores in the system (master address {}). This usually means the master address is incorrect or the TPU worker has some problems. Available devices: {}c                     t         j                  j                  | j                        }|j                  |j
                  |j                  |j                  |j                  fS r   )		tf_device
DeviceSpecfrom_stringnamejobreplicataskdevice_typedevice_index)r	   specs     r    	_sort_keyz-_query_tpu_system_metadata.<locals>._sort_key   sJ    ++FKK8DHHdllDIIt/?/? r!   )keyr'   zFound TPU system:z*** Num TPU Cores: %dz*** Num TPU Workers: %dz *** Num TPU Cores Per Worker: %dz*** Available Device: %szFailed to find TPU: %s)/collectionsdefaultdictlistr   executing_eagerlyr   list_logical_devicessession_lib_DeviceAttributesr   canonicalizer/   r3   logginginfor   Graph
as_defaultSessionget_session_config_with_timeout_PINGING_MASTER_TIMEOUT_IN_MSlist_devicesr
   DeadlineExceededError_RETRY_TIMESwarning
ValueErrorr,   r-   r.   r2   appendr4   setvalueslenRuntimeErrorformatpop_obtain_topologytuplesortedr   r   r   r   r   )master_addresscluster_defquery_topologytpu_core_countr   device_dictlogical_devicesdretry_countsessmsgr	   r5   r   core_idsnum_cores_per_host_setr   r6   metadatas                      r    _query_tpu_system_metadatarc   D   sg    .'''-+ 113O
 () ,,[-E-Eaff-M-.]]AqB )G )
 K
llM!# YY[##% 	""4/   $('')G	.  f++FKK8D5 $))##D$5$56n	  '2'9'9';<8X<>
!"a'w) ) 3668(@@FgA'( (  <H &i01'K 1( LL$%LL((*<*<=LL*H,>,>?LL3//1"" 7ll-v67 
/ LL)84	/k)  	 	 	 ))  1   ,&
//$
$
//-{L
I

+3
   H 	=sV   
AL-"L/ &L"5LL"L/ NL	L"	L/ "L+'L/ /A#NNc                    	 t        j                  d|        t        j                         j	                         5  t        t        |      }t        j                  | |      5 }|j                  t        j                               }|cddd       cddd       S # 1 sw Y   nxY w	 ddd       y# 1 sw Y   yxY w# t        j                  $ r t        d| z        w xY w)zObtains TPU fabric topology.zfInitializing TPU system (master: %s) to fetch topology for model parallelism. This might take a while.r   NzaFail to initialize TPU system with master (%s). Please double check the TPU system is functional.)r@   rA   r   rB   rC   rE   !_INITIAL_TPU_SYSTEM_TIMEOUT_IN_MSr=   rD   runr   initialize_systemr
   rH   rK   )rV   rW   session_configr^   r   s        r    rS   rS      s    LL C! 
			! 6
+[:n
1 4888C1134       
	%	% 
	<	 sF   8B< (B0"%B	B0	B< B#	B0'B< 0B95B< 9B< <"Cc                 4    t        j                  | |      }|S )z>Returns a session given a timeout and a cluster configuration.)operation_timeout_in_msrW   )r   ConfigProto)timeout_in_secsrW   config_protos      r    rE   rE      s    ''-;H,	r!   c                 t   | t         v ry|r|j                  st        S t        d |j                  D              }t        |v rt	        d      t        |      dk(  r|j                  d   j                  S t        |      dk(  r-t        |v r%|j                  t               |j                         S t	        d      )a  Returns the canonical job name to use to place TPU computations on.

  Args:
    master: A `string` representing the TensorFlow master to use.
    cluster_def: A ClusterDef object describing the TPU cluster.

  Returns:
    A string containing the job name, or None if no job should be specified.

  Raises:
    ValueError: If the user needs to specify a tpu_job_name, because we are
      unable to infer the job name automatically, or if the user-specified job
      names are inappropriate.
  Nc              3   4   K   | ]  }|j                     y wr   )r/   ).0r0   s     r    	<genexpr>zmaster_job.<locals>.<genexpr>   s     6s#((6s   z1Currently, tpu_worker is not an allowed job name.r)   r      zCould not infer TPU job name.)
_LOCAL_MASTERSr0   _DEFAULT_JOB_NAMErM   rK   rO   r/   _DEFAULT_COORDINATOR_JOB_NAMEremoverR   )masterrW   	job_namess      r    
master_jobry      s    " ~
[__6koo66))#
H
II^q??1"""^q$	145]]_233r!   )NF)$r%   r8   tensorflow.core.protobufr   tensorflow.python.clientr   r=   tensorflow.python.distributer   tensorflow.python.eagerr   tensorflow.python.frameworkr   r	   r,   r
   r   tensorflow.python.platformr   r@   tensorflow.python.tpur    tensorflow.python.util.tf_exportr   rF   rI   re   rt   ru   rs   
namedtupler   rc   rS   rE   ry    r!   r    <module>r      s    2  / ; 4 + . ; . + < % 6 - $. !   -  /01K. 1 1 116 <@.3`F(!4r!   