
    BVh4                         d Z ddlZddlmZ ddlmZ ddlmZ	 ddl
mZ ddl
mZ ddl
mZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ i ZdZ ej8                  ddd      Zd Zd Zd Z y)z>TPU specific APIs to be used in conjunction with TPU Strategy.    N)
config_pb2)session)cluster_resolver)context)def_function)
monitoring)device)errors)ops)
tf_logging)topology)tpu)compat) localz/tensorflow/tpu/worker_addressz;The worker address that the coordinator/client connects to.addressc           	      	   |&t        |t        j                        rt        |d      st	        d      t        j                  d       t        j                         j                          t        j                         j                          t        j                          d| ~t        j                         rbt        j                  j                  t        j                         j                         }|j"                  dj%                  |j"                         |d      } t'        | |      sJ t)        j*                  | j,                        }|t.        v rt        j0                  d|       t        j                  d|       |t2        vrdj%                  | j5                               t        j                         rt7        j8                  d	
      fd       }t7        j:                         }|r*t        j0                  d       t7        j<                  d	       	 t?        j                  tA        jB                              5   |       }ddd       t        jD                          	 |t7        j<                  |       	 t        j                         jO                          jQ                         }n/t?        jR                         s| jU                         }	| jW                         }
tY        jZ                  d      }|
r)|j\                  j_                  |
ja                                t?        jb                         je                         5  tg        jh                  ||	      5 }|jk                  tA        jl                               }ddd       ddd       nKt?        j                  tA        jB                              5  tA        jl                  d	      }|cddd       S t        j                  d       to        jp                        }| js                  |       |t.        |<   tt        jw                  d      jy                  | j{                                |S # 1 sw Y   xY w# tF        jH                  $ r(}tG        jJ                  dddtM        |      z         d}~ww xY w# |t7        j<                  |       w w xY w# 1 sw Y   ?xY w# 1 sw Y   xY w# 1 sw Y   xY w)a`  Implementation for tpu.experimental.initialize_tpu_system.

  Kept separate to avoid tpu_oss code duplication.

  Initialize the TPU devices.

  Args:
    cluster_resolver: A tf.distribute.cluster_resolver.TPUClusterResolver,
        which provides information about the TPU cluster.
    tpu_cluster_resolver_cls: a reference to
        tf.distribute.cluster_resolver.TPUClusterResolver so that an instance
        of it can be initialized if cluster_resolver is None.
  Returns:
    The tf.tpu.Topology object for the topology of the TPU cluster. If called
    inside tf.function, it returns the serialized topology object instead.

  Raises:
    RuntimeError: If running inside a tf.function.
    NotFoundError: If no TPU devices found in eager mode.
    TypeError: If tpu_cluster_resolver_cls is
        not tf.distribute.cluster_resolver.TPUClusterResolver.
  Ntpu_hardware_featureRtpu_cluster_resolver_cls is not tf.distribute.cluster_resolver.TPUClusterResolver.z6Deallocate tpu buffers before initializing tpu system.{}/replica:0/task:0r   z|TPU system %s has already been initialized. Reinitializing the TPU can cause previously created variables on TPU to be lost.zInitializing the TPU system: %sF	autographc                  4    t        j                   dd      S )NF)job compilation_failure_closes_chipstpu_cancellation_closes_chips)r   initialize_systemr   s   W/home/dcms/DCMS/lib/python3.12/site-packages/tensorflow/python/tpu/tpu_strategy_util.py_tpu_init_fnz0initialize_tpu_system_impl.<locals>._tpu_init_fnr   s!     ""+0(-/ /    zIt looks like tf.function behavior was disabled, perhaps using tf.config.run_functions_eagerly. tf.tpu.experimental.initialize_tpu_system requires tf.function to work. This primitive will override the disable.z9TPUs not found in the cluster. Failed in initialization: Tallow_soft_placementconfigtarget)r   r   z!Finished initializing TPU system.)
serializedr   )>
issubclasscluster_resolver_libClusterResolverhasattr	TypeErrorlogginginfor   _clear_cachesclear_kernel_cachegccollectexecuting_eagerlyr	   
DeviceSpecfrom_stringdevice_namer   format
isinstancer   as_text_tpu_INITIALIZED_TPU_SYSTEMSwarning_LOCAL_MASTERSget_job_namer   functionfunctions_run_eagerlyrun_functions_eagerlyr   r   _tpu_system_device_name
async_waitr
   InvalidArgumentErrorNotFoundErrorstr_initialize_logical_devicesnumpy#executing_eagerly_outside_functionsmastercluster_specr   ConfigProtocluster_defCopyFromas_cluster_defGraph
as_defaultsession_libSessionrunr   r   Topologyset_tpu_topology_tpu_worker_addressget_cellset
get_master)r   tpu_cluster_resolver_clscurr_devicetpu_namer    run_eagerlyoutputeserialized_topologyrJ   rK   session_configsesstpu_topologyr   s                 @r   initialize_tpu_system_implre   +   s
   2 %Z 4 D D.+-CD
	>? ? 
,,GH	//!!#	//&&(**,#   "%%11'//2C2O2OPk		$#**;??;/3	$&>	??	?^^,112())OO	'(02
 
,,0(; ^#  
&
&'7'D'D'F
GC U+/ ,/ 446Koo= ((/8::c11#67    
	 **;7OO113 ,,.224$$&F#002L++FN  )),*E*E*GH				! @nVD @"hhs'<'<'>?@@ @ 
C//4	5 !11E;
 !! ! 
,,23"".AB,##$78'38$ y)--.>.I.I.KL	]    &&   

EF  
	 **;7 
!@ @@ @! !sl   (Q- Q Q- S'$SSS! Q*%Q- -R( #R##R((R+ +SS	SS!S+c                  *    t         j                         S )zReturns all currently initialized tpu systems.

  Returns:
     A dictionary, with tpu name as the key and the tpu topology as the value.
  )r;   copy r!   r   get_initialized_tpu_systemsri      s     
"	&	&	((r!   c                 l  
 |&t        |t        j                        rt        |d      st	        d      d
| ~t        j                         rbt        j                  j                  t        j
                         j                        }|j                  dj                  |j                        
 |d      } t        | |      sJ t        j                  | j                         }|t"        vrt%        j&                  d|z         t%        j(                  d|       t        j                         r0|t*        vrdj                  | j-                               
t/        j0                  d	      
fd
       }t/        j2                         }|r*t%        j&                  d       t/        j4                  d       	 t7        j                  t9        j:                  
            5   |        ddd       |t/        j4                  |       	 t%        j(                  d       t        j
                         j=                          t        j
                         j?                          nt7        j@                         s| jC                         }| jE                         }tG        jH                  d      }|r)|jJ                  jM                  |jO                                t7        jP                         jS                         5  tU        jV                  ||      5 }	|	jY                  t9        jZ                                ddd       ddd       nt]        d      t%        j(                  d       |t"        v rt"        |= yy# 1 sw Y   xY w# |t/        j4                  |       w w xY w# 1 sw Y   mxY w# 1 sw Y   exY w)aW  Implementation for tpu.experimental.shutdown_tpu_system.

  Kept separate to avoid tpu_oss code duplication.

  Shuts down the TPU devices.

  This will clear all caches, even those that are maintained through sequential
  calls to tf.tpu.experimental.initialize_tpu_system, such as the compilation
  cache.

  Args:
    cluster_resolver: A tf.distribute.cluster_resolver.TPUClusterResolver,
        which provides information about the TPU cluster.
    tpu_cluster_resolver_cls: a reference to
        tf.distribute.cluster_resolver.TPUClusterResolver so that an instance
        of it can be initialized if cluster_resolver is None.

  Raises:
    RuntimeError: If no TPU devices found for eager execution or if run in a
        tf.function.
    TypeError: If tpu_cluster_resolver_cls is
        not tf.distribute.cluster_resolver.TPUClusterResolver.
  Nr   r   r   r   zDYou are shutting down a TPU system %s that has not been initialized.z Shutting down the TPU system: %sFr   c                  2    t        j                          y )Nr   )r   shutdown_systemr   s   r   _tpu_shutdown_fnz2shutdown_tpu_system_impl.<locals>._tpu_shutdown_fn  s    	c"r!   zIt looks like tf.function behavior was disabled, perhaps using tf.config.run_functions_eagerly. tf.tpu.experimental.shutdown_tpu_system requires tf.function to work. This primitive will override the disable.zClearing out eager cachesTr"   r$   zinitialize_tpu_system is not supported within tf.functions.  You should call initialize_tpu_system outside of your tf.function. z"Finished shutting down TPU system.)/r(   r)   r*   r+   r,   r   r3   r	   r4   r5   r6   r   r7   r8   r   r9   r:   r;   r-   r<   r.   r=   r>   r   r?   r@   rA   r   r   rB   r/   r0   rI   rJ   rK   r   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rl   RuntimeError)r   r[   r\   r]   rm   r^   rJ   rK   rb   rc   r   s             @r   shutdown_tpu_system_implro      s   4 %Z 4 D D.+-CD
	>? ? 	#   "%%11'//2C2O2OPk		$#**;??;/3	$&>	??	?^^,112(--OO #%-. / 
,,18<  ~% "(()9)F)F)HIcU+# ,# 446Koo= ((/8::c11#67  
	 **;7 LL,-OO##%OO((*224$$&F#002L++FN  )),*E*E*GH				! (nVD ($$&'(( ( 	] 
 
,,34)) * *9  
	 **;7 
! ( (( (sH   5(N M4%N 6N*$N2N*4M>9N NN'	#N**N3)!__doc__r1   tensorflow.core.protobufr   tensorflow.python.clientr   rR   -tensorflow.python.distribute.cluster_resolverr   r)   tensorflow.python.eagerr   r   r   tensorflow.python.frameworkr	   r
   r   tensorflow.python.platformr   r-   tensorflow.python.tpur   r   tensorflow.python.utilr   r;   r=   StringGaugerW   re   ri   ro   rh   r!   r   <module>rz      sr    E 	 / ; b + 0 . . . + < * % )   -j,,$A9N 
M`)m+r!   