
    BVh^                       d Z ddlZddlZddlmZmZmZmZmZm	Z	m
Z
mZ ddlmZ ddlZddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ ddlmZ dd
lmZ ddlmZ ddlm Z  ddlm!Z! ddlm"Z" ddlm#Z# ddlm$Z$ ddlm%Z% ddlm&Z& ddl'm(Z( ddl'm)Z) ddl'm*Z* ddl'm+Z+ ddl'm,Z, ddl'm-Z- ddl.m/Z0 ddl.m1Z1 ddl.m2Z2 ddl.m3Z3 ddl.m4Z4 ddl.m5Z5 ddl6m7Z7 dd l8m9Z: dd!l;m<Z< dd"l;m=Z= dd#l;m>Z> dd$l;m?Z? dd%l;m@Z@ dd&lAmBZB  eCd'd(g      ZDd)ZEd*ZFd+ZGe4jr                  Z9d,ee	   d-e	fd.ZH eBd/g0      	 	 	 	 dmd1eej                     d,ee	   d2eJd3eeJ   d-e:j                  f
d4       ZL	 dnd1ej                  d,ee	   d-e%j                  fd5ZN eBd6g0      dnd,ee	   d-e%j                  fd7       ZOej                  d8e%j                  d9e>j                  d:e>j                  d-eJfd;       ZR eBd<g0       G d= d>ej                               ZT eBd?       G d@ dA ej                  dAdBdCg                   ZV eBdDg0      e?j                  	 	 	 	 	 	 	 dodEedFef   dGeeee:j                           dHee2j                     dIee0j                     dJee	   dKee   dLeeT   dMeeV   d-ee   fdN              ZZdO Z[dGee:j                     dPeee&j                        dLeTd-e
eee      eej                     f   fdQZ^	 dndRZ_	 	 	 	 	 	 	 	 dpdEedFef   dGeeee:j                           dHee2j                     dIee0j                     dJee	   dSeJdKee   dLeeT   dMeeV   d-eee:j                        fdTZ`dUedVeJd-e
eee:j                        ee%j                     ee   f   fdWZadUedVeJd-e
eee:j                        ee%j                     ee   f   fdXZb	 	 	 	 	 	 	 	 	 dqdEedFef   dGeeeee:j                              dYecdZeeec      d[eeJeeJ   f   d\eeec      dHee2j                     dIee0j                     dJee	   dMeeV   d-e
e%j                  ee:j                     f   fd]Zd eBd^g0      e?j                  	 	 	 	 	 	 	 	 	 dqdEedFef   dGeee:j                        dYecdZeeec      d[eeJeeJ   f   d\eeec      dHee2j                     dIee0j                     dJee	   dMeeV   d-ee:j                     fd_              Ze eBd`g0      e?j                  	 	 	 	 	 	 drdEedFef   dGeeeee:j                              dYecdHee2j                     dIee0j                     dJee	   dMeeV   fda              Zf eBdbg0      e?j                  	 	 	 	 	 dsdEedFef   dGeeeee:j                              dHee2j                     dIee0j                     dJee	   dMeeV   d-efdc              Zg eCg dd      Zhd-eJfdeZi G df dge+j                        Zkdhe%j                  fdiZm	 	 	 	 dtdEedFef   dGeee:j                        dHee2j                     dIee0j                     dJee	   d-ee:j                     fdjZndke%j                  fdlZoy)uz Library of TPU helper functions.    N)AnyCallableIterableListOptionalTextTupleUnion)logging)xla)attr_value_pb2)dynamic_padding_pb2)tpu_embedding_configuration_pb2)tf2)auto_control_deps)composite_tensorconfig)constant_op)dtypes)
func_graph)function)ops)tensor_shape)	array_ops)array_ops_stack)cond)control_flow_ops)math_ops)variable_scope)device_assignment)tensor_tracer)tpu_feed)tpu_function)tpu_name_util)tpu_replication)tpu_ops)core)compat)nest)object_identity)traceback_utils)variable_utils)	tf_exportPlaceholderVarHandleOp_post_device_rewrite_tpu_compilation_status_pivot_for_clusterjobreturnc                     | yd| z  S )z;Returns the device name for the TPU_SYSTEM device of `job`.z/device:TPU_SYSTEM:0z/job:%s/device:TPU_SYSTEM:0 )r4   s    I/home/dcms/DCMS/lib/python3.12/site-packages/tensorflow/python/tpu/tpu.py_tpu_system_device_namer9   J   s    [!(3..    ztpu.initialize_system)v1embedding_config compilation_failure_closes_chipstpu_cancellation_closes_chipsc                    | dn| j                         }d}||rd}nd}t        j                  t        |            5  t	        j
                  ||      }| |cddd       S t        j                  |g      5  t	        j                  |      }ddd       t        j                  g      5  t        j                  |d	      cddd       cddd       S # 1 sw Y   IxY w# 1 sw Y   nxY w	 ddd       y# 1 sw Y   yxY w)
a  Initializes a distributed TPU system for use with TensorFlow.

  Args:
    embedding_config: If not None, a `TPUEmbeddingConfiguration` proto
      describing the desired configuration of the hardware embedding lookup
      tables. If embedding_config is None, no hardware embeddings can be used.
    job: The job (the XXX in TensorFlow device specification /job:XXX) that
      contains the TPU devices that will be initialized. If job=None it is
      assumed there is only one job in the TensorFlow flock, and an error will
      be returned if this assumption does not hold.
    compilation_failure_closes_chips: Set the configuration whether
      we want to close TPU chips when there is a compilation failure.
    tpu_cancellation_closes_chips: Set the configuration whether
      we want to close TPU chips when a TPU execution is cancelled. If the value
      is None, the behavior will be determined by the command line flag
      `tpu_cancellation_closes_chips` for the TPU worker. WARNING: this argument
      only applies to TFRT TPU runtime.
  Returns:
    A serialized `TopologyProto` that describes the TPU system. Note:
      the topology must be evaluated using `Session.run` before it can be used.
  N r         )r=   r>   r   tpu_init_identityname)
SerializeToStringr   devicer9   r'   configure_distributed_tpucontrol_dependenciesconfigure_tpu_embeddingr   identity)r<   r4   r=   r>   config_string"tpu_cancellation_closes_chips_enumtopologyembedding_inits           r8   initialize_systemrP   R   s    8 *12#557  ()$".$+,(+,(
zz)#./ D00)I&HH
 D D 
	!	!8*	- M66mLnM		!	!>"2	3 D/BCD DD DM MD D DD D DsA   C:#C:9CC:.C$	C:C!	C:$C-	)C::Dc                     | j                         }t        j                  t        |            5  t	        j
                  |      cddd       S # 1 sw Y   yxY w)aV  Initializes a distributed TPU Embedding system for use with TensorFlow.

  The following two are equivalent:
  1. initialize_system() with embedding_config.
  2. initialize_system() without embedding_config, then
     initialize_system_for_tpu_embedding().
  initialize_system() should not be called with embedding_config if
  initialize_system_for_tpu_embedding() is meant to be called later.

  Args:
    embedding_config: a `TPUEmbeddingConfiguration` proto describing the desired
      configuration of the hardware embedding lookup tables.
    job: The job (the XXX in TensorFlow device specification /job:XXX) that
      contains the TPU devices that will be initialized. If job=None it is
      assumed there is only one job in the TensorFlow flock, and an error will
      be returned if this assumption does not hold.

  Returns:
    A no-op.
  r   N)rF   r   rG   r9   r'   rJ   )r<   r4   rL   s      r8   #initialize_system_for_tpu_embeddingrR      sK    0 #446-
zz)#./ A**-@A A As   AAztpu.shutdown_systemc                     t        j                  t        |             5  t        j                         }ddd       |S # 1 sw Y   S xY w)aQ  Shuts down a running a distributed TPU system.

  Args:
    job: The job (the XXX in TensorFlow device specification /job:XXX) that
      contains the TPU devices that will be shutdown. If job=None it is
      assumed there is only one job in the TensorFlow flock, and an error will
      be returned if this assumption does not hold.
  N)r   rG   r9   r'   shutdown_distributed_tpu)r4   rT   s     r8   shutdown_systemrU      sD     zz)#./ B&??AB	!!B	!!s	   >Aopresource_readsresource_writesc                     | j                   dk(  r&|s|r!|j                          |j                          yyd }t         ||      xs  ||            S )zGReplaces TPUReplicatedInput outputs with its inputs in resource_inputs.TPUReplicatedInputTFc                    g }g }| D ]R  }|j                   j                  dk(  s|j                  |       |j                  |j                   j                         T |D ]  }| j                  |        | j                  |       |xs |S )zEReplaces handles in `resource_inputs` with their unreplicated inputs.rZ   )rV   typeappendextendinputsdiscardupdate)resource_inputs	to_removeto_addresourcets        r8   #replace_with_unreplicated_resourceszJtpu_replicated_input_resolver.<locals>.replace_with_unreplicated_resources   s    IF# *			1	1"hkk(()*  !a !6"Yr:   )r\   clearbool)rV   rW   rX   rg   s       r8   tpu_replicated_input_resolverrj      s`     WW$$
 
1.A C1/B
D Dr:   ztpu.PaddingSpecc                       e Zd ZdZdZdZy)PaddingSpecz:Represents the type of padding policies for tpu.replicate.r   rA   N)__name__
__module____qualname____doc__AUTOPOWER_OF_TWOr7   r:   r8   rl   rl      s    B 
$,r:   rl   ztpu.XLAOptionsc                   (     e Zd ZdZ	 	 d fd	Z xZS )
XLAOptionsa{  XLA compilation options.

  Attributes:
    use_spmd_for_xla_partitioning: Boolean. Whether to use XLA's SPMD
      partitioner instead of MPMD partitioner when compiler partitioning is
      requested.
    enable_xla_dynamic_padder: Boolean. Whether to enable XLA dynamic padder
      infrastructure to handle dynamic shapes inputs inside XLA. True by
      default. Disabling this may cause correctness issues with dynamic shapes
      inputs, as XLA will just assume the inputs are with padded shapes. However
      users can optionally set it to False to improve device time if masking is
      already handled in the user side.
  c                 .    t         t        |   | ||      S N)superrt   __new__)clsuse_spmd_for_xla_partitioningenable_xla_dynamic_padder	__class__s      r8   rx   zXLAOptions.__new__   s#     S)#/L*CE Er:   )TT)rm   rn   ro   rp   rx   __classcell__r|   s   @r8   rt   rt      s     -1(,E Er:   rt   rz   r{   ztpu.replicatecomputation.r_   infeed_queuer!   rE   maximum_shapespadding_specxla_optionsc           
      .    t        | |||||||      d   S )a  Builds a graph operator that runs a replicated TPU computation.

  Example for the basic usage that `inputs` has static shape:

  ```python

  def computation(x):
    x = x + 1
    return tf.math.reduce_mean(x)

  x = tf.convert_to_tensor([1., 2., 3.])
  y = tf.convert_to_tensor([4., 5., 6.])
  tf.compat.v1.tpu.replicate(computation, inputs=[[x], [y]])
  ```

  If the `inputs` has dynamic shapes and you would like to automatically
  bucketize the inputs to avoid XLA recompilation. See the advanced example
  below:

  ```python

  def computation(x):
    x = x + 1
    return tf.math.reduce_mean(x)

  # Assume input tensors in two replicas `x` and `y` both have dynamic shape
  # ([None, 2]).
  tf.compat.v1.tpu.replicate(
    computation,
    inputs=[x, y],
    maximum_shapes=[tf.TensorShape([None, None])],
    padding_spec=tf.compat.v1.tpu.PaddingSpec.POWER_OF_TWO)
  ```

  Args:
    computation: A Python function that builds the computation to replicate.
    inputs: A list of lists of input tensors or `None` (equivalent to
      `[[]]`), indexed by `[replica_num][input_num]`. All replicas must
      have the same number of inputs. Each input can be a nested structure
      containing values that are convertible to tensors. Note that passing an
      N-dimension list of compatible values will result in a N-dimension list of
      scalar tensors rather than a single Rank-N tensors. If you need different
      behavior, convert part of inputs to tensors with `tf.convert_to_tensor`.
    infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple
      of arguments as inputs to computation.
    device_assignment: If not `None`, a `DeviceAssignment` describing the
      mapping between logical cores in the computation with physical cores in
      the TPU topology. Uses a default device assignment if `None`. The
      `DeviceAssignment` may be omitted if each replica of the computation uses
      only one core, and there is either only one replica, or the number of
      replicas is equal to the number of cores in the TPU system.
    name: (Deprecated) Does nothing.
    maximum_shapes: A nested structure of tf.TensorShape representing the shape
      to which the respective component of each input element in each replica
      should be padded. Any unknown dimensions (e.g.
      tf.compat.v1.Dimension(None) in a tf.TensorShape or -1 in a tensor-like
      object) will be padded to the maximum size of that dimension over all
      replicas. The structure of `maximum_shapes` needs to be the same as
      `inputs[0]`.
    padding_spec: An enum specified by `tpu.PaddingSpec`. This describes the
      padding policy when the `inputs` to `tpu.replicate` is dynamic.
      One usage is to enable automatic bucketizing on the inputs by setting the
      value to `tpu.PaddingSpec.POWER_OF_TWO`, which can help to reduce the
      recompilation in the XLA side.
    xla_options: An instance of `tpu.XLAOptions` which indicates the options
      passed to XLA compiler. Use `None` for default options.
  Returns:
    A list of outputs, indexed by `[replica_num]` each output can be a nested
    structure same as what computation() returns with a few exceptions.

    Exceptions include:
      1) None output: a NoOp would be returned which control-depends on
         computation.
      2) Single value output: A tuple containing the value would be returned.
      3) Operation-only outputs: a NoOp would be returned which
         control-depends on computation.
      TODO(b/121383831): Investigate into removing these special cases.

  Raises:
    ValueError: If all replicas do not have equal numbers of input tensors.
    ValueError: If the number of inputs per replica does not match
      the number of formal parameters to `computation`.
    ValueError: If the static `inputs` dimensions don't match with the values
      given in `maximum_shapes`.
    ValueError: If the structure of inputs per replica does not match
      the structure of `maximum_shapes`.
  )r   r   r   rA   )split_compile_and_replicate)r   r_   r   r!   rE   r   r   r   s           r8   	replicater     s4    D 
%
#
  !
" "r:   c                 N   t        j                  | t        j                        } t        j                  |       t        j                  |dz        z  }t        j
                  |      }t        j                  |dz  |      }t        j                  |t        j                        }|S )zCeil input `x` to power of `n`.g      ?)r   castr   float32logceilpowint32)xnlognxresults       r8   _ceil_to_pow_of_nr   n  sp    mmAv~~&!
,,q/HLLS1
1%
--
%<<C'&==.&	-r:   padded_shapesc           	         g }g }g }t        |       D ]$  \  }}t        |      D ]  \  }j                         j                         }	|dk(  rN|j                  g        |j                  |	       |j                  t	        j
                  |	dt                     n;t        |	      D ]  \  }
}||||   |
   k7  sd||   |
<    t        |	||         ||<   t        j                        }|j                  j                  t        t        j                  d             ||   j                  |        ' g }|D ];  }|j                  t        j                   t#        j$                  |      d             = g }g }g }t        |       D ]  \  }}|j                  g        |j                  g        t'        |      dz
  }t        |      D ]G  \  }||   |   }j                         j                         }	||   }t)        ||         r|t        |	      D ]  \  }
}||   |
   s|dk(  r?|dz  }t+        j,                         }||_        |
|_        ||_        |j                  |       ||   j                  t        j4                  ||
   t6        j8                                g t        |j:                        D ]  \  }
}||   |
   rkd}|j<                  t        |j<                  |      }n;t        j>                  ||   |
   |      }|t@        jB                  k(  rtE        |d      }d|||
   z
  g}nddg}j                  |        j                         jG                         r2tI        jH                  t        jJ                  d      fd	fd
      }nt        jL                        }|j                  j                  t        t        j                  d             ||   j                  |       4||   j                         J  t'        |      }tO        |      D ]  }
||
   jQ                  ||
           ||fS )a  Pad all input tensors given padded_shapes.

  The real shape tensors will be concatenated with the padded original inputs.

  Args:
    inputs: The original inputs.
    padded_shapes: A list of padded shapes for each input. If an entry is None,
      no padding is performed.
    padding_spec: An enum specified by `tpu.PaddingSpec`. This describes the
      padding policy when the `inputs` to `tf.tpu.replicate` is dynamic.
      One usage is to enable automatic bucketizing on the inputs by setting the
      value to `tpu.PaddingSpec.POWER_OF_TWO`, which can help to reduce the
      recompilation in the XLA side.

  Returns:
    The padded inputs and a PaddingMap list which maps the padded input
    dimension to the real shape argument index.
  r   F)dtypeTbaxisrA   rB   c                  0    t        j                         S rv   )r   pad)input_tensorpaddingss   r8   <lambda>z _pad_all_input.<locals>.<lambda>  s    immL(; r:   c                       S rv   r7   )r   s   r8   r   z _pad_all_input.<locals>.<lambda>  s    l r:   ))	enumerate	get_shapeas_listr]   np	full_likeri   maxr   shaperV   	_set_attr_POST_DEVICE_REWRITE_ATTRr   	AttrValuer   
reduce_maxr   stacklenanydynamic_padding
PaddingMap	arg_indexshape_indexpadding_arg_indexr   r   r   dimsvaluemaximumrl   rr   r   is_fully_definedr   constantr   ranger^   )r_   r   r   maximum_static_shapesneed_paddinginput_shape_tensorscore_idxinputs_per_coreidxinput_shapeisreal_input_shaper   shapes_per_inputpadded_inputsreal_shapespadding_mapsreal_shape_idxinput_shape_tensorpadded_shapepadding_mapminimum_dynamic_dim_sizemax_dim_sizepaddingpadded_inputnum_replicasr   r   s                              @@r8   _pad_all_inputr   x  sp   2  ,#,V#4 8h&7 8\ **,446k	Q""2&$$[1BLLe4HIk* 	(DAqY!4S9!<<#'La 	( &))>s)C&Ec" #6##
#

"
"T
*, #%%&67%88* .- NO112BC!LNN -+,#,V#4 ?5hr)A-N&7 ;5\.s3H= **,446k"3'l 
\#	L$<k* 
	DDAq#q!1}!n+668k&)k#()k%.<k+!!+.!((03V\\BD
	D l//0 	#DAq#q! ()$ww" *BCl &--nS.A!.D.FHl!9!990qA,);A)>>?G!fG
//'
"'	#* !!#446   &;"$,
 #|X>, 	!!%$$t,	. 	h&&|4h&&|4w;5	?5B ]#, ,a!KN+, 
	$$r:   c                     t        | t        j                        r&t        t	        j
                  | d            }|f|z  S |S )ai  For an input, replaced the input by a tuple if the input is composite.

  If `maybe_composite` is not composite, return the parameter
  `non_composite_output` otherwise return a tuple which consists of the value of
  the parameter `composite_output` the same number of times as there are
  components of the composite tensor.

  This is useful for computing a mask when flattening nested data with
  `expand_composites=True`. For example

  ```python
  nest.flatten(data, expand_composites=True)
  ```

  and

  ```python
  nest.flatten(nest.map(
      data, lambda x: _flatten_and_filter_composite(x, False, True)))
  ```

  will have the same length and second will be True if the tensor in the first
  is derived from a expanding a composite tensor.

  Args:
    maybe_composite: A value to test for being a composite tensor.
    non_composite_output: The value to return when `maybe_composite` is not a
      composite.
    composite_output: the value to fill the output tuple with if
      `maybe_composite` is a composite.

  Returns:
    `non_composite_output` or a tuple with multiple copies of
    `composite_output`.
  Texpand_composites)
isinstancer   CompositeTensorr   r*   flatten)maybe_compositenon_composite_outputcomposite_outputnum_componentss       r8   _flatten_and_filter_compositer     s?    L !1!A!ABoNON//	r:   use_tpuc	                 T  < ~|g gn|}|xs
 t               }i }	|R|j                  j                         |j                  j	                         j                         d}	|j                  |	d<   t        j                         |	d<   t        j                         rt        j                  d       t        |t              st        dt        |             t        d |D              r%t        d|D 
cg c]  }
t        |
       c}
       t!        |      }|d	k(  rg S t#        d
|      D ]  }t%        j&                  |d	   ||            t)        j*                  |      }|D cg c]  }t%        j                  |d       }}t%        j                  t%        j,                  d |d	               }g }|D ]K  }
|j/                  |
D cg c].  }|t1        j2                  d	      nt5        j6                  |      0 c}       M |d	   D cg c]  }|j8                   }}t!        |d	         }t!        |      }t#        |      D ]x  }t!        ||         |k7  r(t;        dj=                  ||t!        ||                     ||   D cg c]  }|j8                   }}||k7  s^t;        dj=                  |||             t?        j@                  | ||      }|o|/t        d| d|d	   D cg c]  }|jB                   c} d|       t        d| d|d	   D cg c]  }|jB                   c} dd|jD                   d|       d}|r|rt;        d      t%        j&                  |d	   |d       t%        j                  tG        t%        j                  |d	         t%        j                  |            D cg c]  \  }}tI        ||       c}}      }|D cg c]  }|tK        jL                  |      nd }}t%        j&                  |d	   |d       |}tO        |||      \  }}|rd}t        j                  d|d	          tQ        | dd      |	d<   |jR                  |	d<   t5        jT                         }g }t#        d	t!        |d	               D ]U  }t#        |      D cg c]
  }||   |    } }|j/                  tW        jX                  | dj=                  |                   W t        |tZ        j\                        r|j_                  d|jB                  z         }!n|j_                  d       }!ta        jb                  |!d!z         }"|"je                  tf        ti        jj                  tm        jn                  |!      "             tq        jr                  |!||"#      }#	 |#ju                          tW        jv                  d1||d$|	}$ty        jz                  |      5  t5        j|                  |$g      5  |r|j~                  rD ]  }%||%j                     j                  }&t        j                  ||%j                     |%j                  ||%j                           ||%j                  <   ||%j                     j                  |&        t        |      D cg c]+  \  }}t        j                  |d%j=                  |            - }}}tG        ||      D ]:  \  }}'|r|'s|j                  je                  d&ti        jj                  d'             < tG        ||d	         D (
cg c]  \  }(}
|
dn|( })}(}
t%        j                  |d	   |)d| d(      })|7|j                  |       |j                         D ]  }*|)j/                  |*        t        j                         }+|+j                  },|+j                  <<fd)}-|+j                  d       |+j                  |-        | |) }.|+j                  |,       |+j                  <       t)        j*                  |.      }.ddd       ddd       |jR                  xr |duxr |j                  d
kD  }/t?        j                  .      }0|0rt        |.|/      \  }1}2}3nt        |.|/      \  }1}2}3t        j                  j                         rdt        j                         rt        j                  d*       n:t        j                         }4|4j                  t5        jT                         |1|2|      }1|#j                  |1       |#j                          |#j                          |#j                         }5|5rRti        jj                         }6|6j                  j                  j                  d+ |5D               |$je                  d,|6       t5        j|                  |$g      5  |r`tW        j                         }7|7j                  }8ti        jj                  tm        jn                  |!      "      }6|8je                  t        |6       nta        jb                  d-      }7ddd       |1s37t#        |      D cg c]  }ta        j                  |2d.|z         c}gS t#        |      D cg c]  }g  }9}t        |1      D ]  \  }}*|*%t#        |      D ]  }|9|   j/                  d        -tW        j                  |*|d/j=                  |            }:t5        j|                  |2      5  t#        |      D ]3  }|9|   j/                  t        j                  |:|   d0||fz               5 	 ddd        |9D ;cg c]  };t%        j                  |3|;d       }9};7|9gS c c}
w c c}w c c}w c c}w c c}w c c}w c c}w c c}}w c c}w c c}w c c}}w c c}
}(w # 1 sw Y   xY w# 1 sw Y   xY w# |#j                          |#j                          |#j                         }5w xY w# 1 sw Y   xY wc c}w c c}w # 1 sw Y   xY wc c};w )2ay  Builds graph operators that runs compilation and replicated computation.

  This is a lower level interface than replicate that returns a separate compile
  and execute output tensor. In the generated graph the compile op feeds into
  the execute op and no additional compilation is incurred when running the
  compile op before the execute op. The compile op returns additional
  information about the compilation but does not return the compiled program.

  Args:
    computation: A Python function that builds the computation to replicate.
    inputs: A list of lists of input tensors or `None` (equivalent to
      `[[]]`), indexed by `[replica_num][input_num]`. All replicas must
      have the same number of inputs. Each input can be a nested structure
      containing values that are convertible to tensors. Note that passing an
      N-dimension list of compatible values will result in a N-dimension list of
      scalar tensors rather than a single Rank-N tensors. If you need different
      behavior, convert part of inputs to tensors with `tf.convert_to_tensor`.
    infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple
      of arguments as inputs to computation.
    device_assignment: If not `None`, a `DeviceAssignment` describing the
      mapping between logical cores in the computation with physical cores in
      the TPU topology. Uses a default device assignment if `None`. The
      `DeviceAssignment` may be omitted if each replica of the computation uses
      only one core, and there is either only one replica, or the number of
      replicas is equal to the number of cores in the TPU system.
    name: (Deprecated) Does nothing.
    use_tpu: When false, the input `computation` is executed on the XLA CPU/GPU
      backends. Currently, only supports a default placement (computation is
      placed on GPU if one is available, and on CPU if not).
    maximum_shapes: A nested structure of tf.TensorShape representing the shape
      to which the respective component of each input element in each replica
      should be padded. Any unknown dimensions (e.g.
      tf.compat.v1.Dimension(None) in a tf.TensorShape or -1 in a tensor-like
      object) will be padded to the maximum size of that dimension over all
      replicas. The structure of `maximum_shapes` needs to be the same as
      `inputs[0]`.
    padding_spec: An enum specified by `tf.tpu.PaddingSpec`. This describes the
      padding policy when the `inputs` to `tf.tpu.replicate` is dynamic.
      One usage is to enable automatic bucketizing on the inputs by setting the
      value to `tpu.PaddingSpec.POWER_OF_TWO`, which can help to reduce the
      recompilation in the XLA side.
    xla_options: An instance of `tpu.XLAOptions` which indicates the options
      passed to XLA compiler. Use `None` for default options.

  Returns:
    A list of lists with the first list corresponding to the compile op and the
    second a list of output tensors, indexed by `[replica_num][output_num]`.
  Raises:
    ValueError: If all replicas do not have equal numbers of input tensors.
    ValueError: If the number of inputs per replica does not match
      the number of formal parameters to `computation`.
    ValueError: If the static `inputs` dimensions don't match with the values
      given in `maximum_shapes`.
    ValueError: If the structure of inputs per replica does not match
      the structure of `maximum_shapes`.
  N)rN   r!   num_cores_per_replicaallow_soft_placementzfAutomatic outside compilation is enabled. Ops without XLA kernels will be automatically placed on CPU.z@tpu.replicate() inputs must be a list of lists/tuples, received c              3   J   K   | ]  }t        |t        t        f         y wrv   )r   listtuple).0inps     r8   	<genexpr>z.split_compile_and_replicate.<locals>.<genexpr>  s     >ZdE]+	+>s   !#zGtpu.replicate() inputs must be a list of lists/tuples, received types: r   rA   Tr   c                     t        | dd      S )NFT)r   r   s    r8   r   z-split_compile_and_replicate.<locals>.<lambda>  s    -a= r:   z`Replicas must have the same number of inputs. Replica 0 had {} inputs, replica {} had {} inputs.zdReplicas must have matching input types. Replica 0 had input types {}, replica {} had input types {}zOSupplied computation cannot be called with the specified inputs. You specified z	 inputs: z, but the computation needs  zand z: additional inputs from infeed, but the computation needs Fz9Dynamic input shapes are not supported with infeed queues)check_typesz&TPU has inputs with dynamic shapes: %sstep_marker_locationSTEP_MARK_AT_ENTRYrz   zinput{}rD   cluster_clusterz/pivot)r   )rE   r   pivot)r   r   zreplicated_input_{}_tpu_input_identityr   )	structureflat_sequencer   c                     |j                  dd      }|d|d<   t        j                  d||        | |g|i |S  | |g|i |S )z)Variables on TPU have a few restrictions.partitionerNz~Partitioned variables are not supported on TPU. Got `partitioner` that is %s for variable %s. Setting `partitioner` to `None`.)getr   warning)getterrE   argskwargsr   saved_custom_getters        r8   custom_getterz2split_compile_and_replicate.<locals>.custom_getterN  sn    jj5""&&

//12=tE &.t.v.
.$VTCDCFC
Cr:   z<TF API ver >= 2.0 detected. Tensor Tracer v1 is not enabled.c              3   F   K   | ]  }t        j                  |        y wrv   )r)   as_bytesr   r   s     r8   r   z.split_compile_and_replicate.<locals>.<genexpr>  s     KAV__Q/Ks   !host_compute_corecompilation_statuszshard_%dzoutput{}zoutput_%d_shard_%dr7   )hrt   rN   
serializedcore_assignmentr   tolistr   r   get_soft_device_placementr   infor   r   	TypeErrorr\   r   r   r   r*   assert_same_structurer-   convert_variables_to_tensorsmap_structurer]   r   r   r   convert_to_tensorr   
ValueErrorformatr   check_function_argument_countrE   number_of_tuple_elementszipr   r   TensorShaper   getattrrz   get_default_graphr'   tpu_replicated_inputr   	FuncGraphunique_namer   no_opr   _PIVOT_FOR_CLUSTERr   r   r)   r   r&   TPUReplicateContextEntertpu_replicate_metadatar$   tpu_shard_contextrI   r{   r   r   tf2xlaset_dynamic_dimension_sizer   r   	set_shaper   r   rK   rV   pack_sequence_asset_number_of_shardsgenerate_dequeue_opr    get_variable_scopeuse_resourcer   set_use_resourceset_custom_getteris_flat_postprocess_flat_outputs_postprocess_non_flat_outputsr"   TensorTracer
is_enabledr   enabledwarn	trace_tpu
ExitResultreport_unsupported_operationsExitHostComputeCorer   r^   tpu_compilation_result_TPU_COMPILATION_STATUS_ATTRgrouptpu_replicated_output)=r   r_   r   r!   rE   r   r   r   r   metadata_kwargsr   r   r   per_replica_inputflat_inputs_with_nonesis_compositeflat_inputsr   flat_input_typesinput_arityflat_input_aritytypes	arg_errordynamic_shape_inputsyflat_maximum_shapesr   unpadded_inputsr   graphflat_replicated_inputsreplicareplicascluster_namer   contextmetadatar   r   	composite
replicatedcomputation_inputsrf   vscopesaved_use_resourcer   outputsneed_spmd_partitioningoutputs_is_flatoutput_tensorscontrol_depspack_templatettr   
attr_valuecompile_statusrV   replicated_outputsysreplica_outsr   s=                                                               @r8   r   r   '  s   F >B4v&+z|+/"
 &&113--557>>@	O 	// +, -3,L,L,N/()%%'LL " # 
FD	!
   $V~/ 0 0>v>>
	178#DI89	;< < V, QI L! 5avay&)45 66v>&
  &
 ll$=  d00=vayJ K,
 +# c $%9Q#2G2G2JJ  (31~6!agg66F1I+)* 
8a
6!9~$ !!'QF1I!GI I *!n-QWW-E-   GGMv+QH78 8
8 //;.)&-y&)1LQ!&&1L0M N''0k34 4
 &-y&)1LQ!&&1L0MQP667 844=;@A A 
EG G 	vay.eL ,,fQi0n57	8Q 
'q!	, 	89 % ()}  #$>  	{1~/B+02 "O ./B/;!=K !ll;VAYG,3)+?-A/() // 12 


!% CA'( 1a7<\7JKGG$Q'KHK!!$$9++A.	011
 z++, $$Z%**%<=L$$Y/L

 
 lX&=
>%//$ **V__\-JKM//l%A'v2MMO-- G!7G6EGH 
	'	'
 TE//
;TE 
+"G"G' 	OK.{/D/DEKK+'-'H'H()>)>?))()F)FG(I !##%
 !!6!6
7
A
A+
N	O   67 a 

Q%:%A%A!%D
E    4lC 	;,!Y $y
$$...'11D9;	; FI$&<Q&?FA2A*c#+$:
-   001I*+<,<= " 
	!)),7113 	'A

#
#A
&	' 002f!.."00D d#}-/0g0123;;GDgiTE TEn 	11 	4%	4//!3  kk'*O4M
)5+1nlM (1G
H 2nlM !!,,.	 8 	9 '')c&;&;&=&4l&24 ~&))+LLN//1))+JOOK9JKK*J7

+ I557nb!++fool.KLjll/<'--3GHnI 
 	 <(	
 ""<j1nE	
  %*,$78q88' Hda 	y<( 1'7#**401 
	&	&	<j//2
4B
 
	!	!,	/ H<( H'7#**7"6!W"EG	HHH H#H2 -
 M<4P 
 ,	--c	 9  7 . 2M
 2M.	82 LR "ETE TE TE TE\ ))+LLN//1I I	
 9$H Hs   1m3
$m83m=

nn)nn9n
 n$n!%=o "n?9B#n20n&
n2&An2*n,
:C)n2#n?+C6o ;A9p!p>	p=App%&n22n<	7n??o	o 2o>pp"	rO  rP  c                    | 
t               } t        j                  | d      }t        j                  | d      } | t        j                         fz  } d }	 |r2| D cg c]&  }t        |t        j                        r|n ||      ( } }nXt        j                  t        d            5  | D cg c]&  }t        |t        j                        r|n ||      ( } }ddd       | D cg c]  }t        |t        j                        s|! }}| D cg c]  }t        |t        j                        r|! }}| ||z   k7  rt        d      t        |      d	kD  r|dd	t        |      z
   }g }|D ]  }	|	|j                  d       |rWt        j                  |	      }|j                   j#                  d
t%        j&                  d             |j                  |       qt        j                  |	j                  r|	j                  n
t        d            5  t        j                  |	      }|j                   j#                  d
t%        j&                  d             |j                  |       ddd        |||fS c c}w c c}w # 1 sw Y   xY w# t        $ r}t        d|       d}~ww xY wc c}w c c}w # 1 sw Y   XxY w)a  Validates non-flat outputs, add backs device assignments and other attrs.

  Args:
    outputs: Output from `computation` inside `tpu.rewrite`.
    need_spmd_partitioning: Whether XLA SPMD partitioning is needed.

  Returns:
    - Tensors extracted from outputs.
    - Operations extracted from outputs.
    - A pack template for use with nest.pack_sequence_as to pack the tensors.
  NFr   Tc                 4    | d S t        j                  |       S rv   )r   r	  r   s    r8   r   z+_postprocess_flat_outputs.<locals>.<lambda>  s    AID 33H3H3K r:   r   z_TPU function return values must all either be Operations or convertible to Tensors. Got error: zYTPU functions must return zero-or more Tensor values followed by zero or more Operations.rA   _tpu_output_identityr   )r   r*   r   r   r  r   r   	OperationrG   r(   	Exceptionr
  r   r]   r   rK   rV   r   r   r   )
rO  rP  rT  maybe_convertoeoutput_operationsrR  new_output_tensorsrf   s
             r8   r&  r&    s   , _gG ,,w%@-
 LLD9' 
$$&(('K-3  !S]]+!q1A
Ag 
 ::d1g 
 
 As}}-A=3CC
 

 #*JQZ3==-IqJJ&K!jCMM.JAK.K!222
	#$ $ 		a!"=1s+<'=#=>M  %ay%	


Q
addnn+^-E-E-MN"::!((ahhQ8 %q!	-~/G/G$/OP!!!$% %%  
.	==g

 

 
 3
	../S	23 33 KK8% %sy   J +I;!J )J.+J JJ 'J1J1J61J6AJ;;J  JJ
J 	J.J))J.;K	c                 X   t        j                  | d      }t        |      D ]S  \  }}|d||<   t        |t        j
                        rt        d|j                   d      	 t	        j                  |      }|r^t        j                  |      }|j                  j                  dt        j                  d             t        j                  |      ||<   t	        j                   |j                   r|j                   n
t#        d	            5  t        j                  |      }|j                  j                  dt        j                  d             t        j                  |      ||<   ddd       V |g | fS # t        $ r}t        d| d      d}~ww xY w# 1 sw Y   xY w)
a  Validates non-flat outputs, add backs device assignments and other attrs.

  Args:
    outputs: Output from `computation` inside `tpu.rewrite`.
    need_spmd_partitioning: Whether XLA SPMD partitioning is needed.

  Returns:
    - Tensors extracted from outputs.
    - An empty Operations list because Operations are not allowed in non-flat
      outputs.
    - A pack template for use with nest.pack_sequence_as to pack the tensors.
  Tr   Nztpu.rewrite does not support Operation as return value in non-flat output structure. You can set returned Operations as control dependencies of returned Tensors so Operations are triggered when Tensors are evaluated. Operation found: ""z`TPU function return values must all either be Operations or convertible to Tensors. Got error: "r]  r   r   )r*   r   r   r   r   r^  r
  rE   r	  r_  r   rK   rV   r   r   r   rG   r(   )rO  rP  flat_outputsr   ra  rb  s         r8   r'  r'    s   $ g>, % $0dayl1o!S]]#6 78ffXQ@A A7



"a 


Q
addnn+^-E-E-MN!**1-l1o::!((ahhQ8 0q!	-~/G/G$/OP#,,Q/Q0 0?$0N 
r7	""3  7112167 77"0 0s%   &E?AF?	FFFF)	
num_shardsinput_shard_axesoutputs_from_all_shardsoutput_shard_axesc
           	         |dk  rt        d|       |g n|}t        |t              st        dt	        |             |D 
cg c]  }
t        j                  |
       }}
|dgt        |      z  }t        |      t        |      k7  r$t        dt        |       dt        |       d      |rSt        ||      D 
cg c]  \  }}
t        j                  |
||       }}}
t        | D cg c]  }t        |       }}ng g|z  }t        | |||||		      \  }}t        |d   t
        j                        r||d   gfS t        |d         }|dg|z  }|t        |      k7  rt        d
| dt        |       d      t        |t              r|g|z  }|t        |      k7  rt        d| dt        |       d      g }t        ||t        |       D ]  \  }}}
|rt|
d   j                  }|duxr |j                  dk(  }|j!                  |rt#        j$                  t        |
            nt        j&                  t        |
      |             }|j!                  |
d           ||fS c c}
w c c}
}w c c}w )a  Shards `computation` for parallel execution.

  `inputs` must be a list of Tensors or None (equivalent to an empty list), each
  of which has a corresponding split axis (from `input_shard_axes`). Each input
  is split into `num_shards` pieces along the corresponding axis, and
  computation is applied to each shard in parallel.

  Tensors are broadcast to all shards if they are lexically captured by
  `computation`. e.g.,

  x = tf.constant(7)
  def computation():
    return x + 3
  ... = shard(computation, ...)

  If `outputs_from_all_shards` is true, the outputs from all shards of
  `computation` are concatenated back together along their `output_shard_axes`.
  Otherwise, each output is taken from an arbitrary shard.

  Inputs and outputs of the computation must be at least rank-1 Tensors.

  Args:
    computation: A Python function that builds a computation to apply to each
      shard of the input.
    inputs: A list of input tensors or None (equivalent to an empty list). Each
      input tensor has a corresponding shard axes, given by `input_shard_axes`,
      which must have size divisible by `num_shards`.
    num_shards: The number of shards.
    input_shard_axes: A list of dimensions along which to shard `inputs`, or
      `None`. `None` means "shard all inputs along dimension 0". If not `None`,
      there must be one dimension per input.
    outputs_from_all_shards: Boolean or list of boolean. For each output, if
      `True`, outputs from all shards are concatenated along the corresponding
      `output_shard_axes` entry. Otherwise, each output is taken
      from an arbitrary shard. If the argument is a boolean, the argument's
      value is used for each output.
    output_shard_axes: A list of dimensions along which to concatenate the
      outputs of `computation`, or `None`. `None` means "concatenate all outputs
      along dimension 0". If not `None`, there must be one dimension per output.
      Ignored if `outputs_from_all_shards` is False.
    infeed_queue: If not `None`, the `InfeedQueue` to use to augment the inputs
      of `computation`.
    device_assignment: If not `None`, a `DeviceAssignment` describing the
      mapping between logical cores in the computation with physical cores in
      the TPU topology. Uses a default device assignment if `None`. The
      `DeviceAssignment` may be omitted if each shard of the computation uses
      only one core, and there is either only one shard, or the number of shards
      is equal to the number of cores in the TPU system.
    name: (Deprecated) Does nothing.
    xla_options: An instance of `tpu.XLAOptions` which indicates the options
      passed to XLA compiler. Use `None` for default options.
  Returns:
    A tuple of (compile op, [output tensors]).
  Raises:
    ValueError: If num_shards <= 0
    ValueError: If len(input_shard_axes) != len(inputs)
    ValueError: If len(output_shard_axes) != len(outputs from `computation`)
  r   z0num_shards must be a positive integer. Received NzAtpu.shard()'s inputs must be a list of Tensors or None. Received zKLength of input_shard_axes must be equal to the number of inputs. Received z inputs and z input_shard_axes.r   r   r!   rE   r   zMLength of output_shard_axes must be equal to the number of outputs. Received z outputs and z output_shard_axes.zSLength of outputs_from_all_shards must be equal to the number of outputs. Received z outputs  and z outputs_from_all_shards.)r
  r   r   r  r\   r   r	  r   r  r   splitr   r^  ri   r   ndimsr]   r   r   concat)r   r_   rh  ri  rj  rk  r   r!   rE   r   r   r   split_inputsr   transposed_inputs
compile_oprO  num_outputsresults
all_shardsr   	is_scalars                         r8   split_compile_and_shardrx  \  s   R 1_

::,GI I 2V&	FD	!
   $V~/ 0 0 /55C!!!$5&5sS[([C())
 ,,/K=,-..@B C C 
 -v68T1 	:D18L 8
 +.|*<=Qa==z)3)*g 
CMM* 
|##
 GAJ+ k)C)**
 --8M : 1233FH I I '.67+EC/00
	(M&'
((A	CD D
 '"#46M#&= 2 
tZdjjet#:)9inn	o++DG4$++DG$?B nnQqT
 
W	Y 68
 >s   I- "I2/I8z	tpu.shardc
                 2    t        | |||||||||	
      d   S )a  Shards `computation` for parallel execution.

  `inputs` must be a list of Tensors or None (equivalent to an empty list), each
  of which has a corresponding split axis (from `input_shard_axes`). Each input
  is split into `num_shards` pieces along the corresponding axis, and
  computation is applied to each shard in parallel.

  Tensors are broadcast to all shards if they are lexically captured by
  `computation`. e.g.,

  x = tf.constant(7)
  def computation():
    return x + 3
  ... = shard(computation, ...)

  TODO(phawkins): consider adding support for broadcasting Tensors passed
  as inputs.

  If `outputs_from_all_shards` is true, the outputs from all shards of
  `computation` are concatenated back together along their `output_shard_axes`.
  Otherwise, each output is taken from an arbitrary shard.

  Inputs and outputs of the computation must be at least rank-1 Tensors.

  Args:
    computation: A Python function that builds a computation to apply to each
      shard of the input.
    inputs: A list of input tensors or None (equivalent to an empty list). Each
      input tensor has a corresponding shard axes, given by `input_shard_axes`,
      which must have size divisible by `num_shards`.
    num_shards: The number of shards.
    input_shard_axes: A list of dimensions along which to shard `inputs`, or
      `None`. `None` means "shard all inputs along dimension 0". If not `None`,
      there must be one dimension per input.
    outputs_from_all_shards: Boolean or list of boolean. For each output, if
      `True`, outputs from all shards are concatenated along the corresponding
      `output_shard_axes` entry. Otherwise, each output is taken
      from an arbitrary shard. If the argument is a boolean, the argument's
      value is used for each output.
    output_shard_axes: A list of dimensions along which to concatenate the
      outputs of `computation`, or `None`. `None` means "concatenate all outputs
      along dimension 0". If not `None`, there must be one dimension per output.
      Ignored if `outputs_from_all_shards` is False.
    infeed_queue: If not `None`, the `InfeedQueue` to use to augment the inputs
      of `computation`.
    device_assignment: If not `None`, a `DeviceAssignment` describing the
      mapping between logical cores in the computation with physical cores in
      the TPU topology. Uses a default device assignment if `None`. The
      `DeviceAssignment` may be omitted if each shard of the computation uses
      only one core, and there is either only one shard, or the number of shards
      is equal to the number of cores in the TPU system.
    name: (Deprecated) Does nothing.
    xla_options: An instance of `tpu.XLAOptions` which indicates the options
      passed to XLA compiler. Use `None` for default options.
  Returns:
    A list of output tensors.
  Raises:
    ValueError: If num_shards <= 0
    ValueError: If len(input_shard_axes) != len(inputs)
    ValueError: If len(output_shard_axes) != len(outputs from `computation`)
  )	r_   rh  ri  rj  rk  r   r!   rE   r   rA   )rx  )
r   r_   rh  ri  rj  rk  r   r!   rE   r   s
             r8   shardrz    s:    T 
!'5))

  !

" 
"r:   ztpu.batch_parallelc           	      &    t        | ||||||      S )a  Shards `computation` along the batch dimension for parallel execution.

  Convenience wrapper around shard().

  `inputs` must be a list of Tensors or None (equivalent to an empty list).
  Each input is split into `num_shards` pieces along the 0-th dimension, and
  computation is applied to each shard in parallel.

  Tensors are broadcast to all shards if they are lexically captured by
  `computation`. e.g.,

  x = tf.constant(7)
  def computation():
    return x + 3
  ... = shard(computation, ...)

  The outputs from all shards are concatenated back together along their 0-th
  dimension.

  Inputs and outputs of the computation must be at least rank-1 Tensors.

  Args:
    computation: A Python function that builds a computation to apply to each
      shard of the input.
    inputs: A list of input tensors or None (equivalent to an empty list). The
      0-th dimension of each Tensor must have size divisible by `num_shards`.
    num_shards: The number of shards.
    infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple
      of arguments as inputs to `computation`.
    device_assignment: If not `None`, a `DeviceAssignment` describing the
      mapping between logical cores in the computation with physical cores in
      the TPU topology. Uses a default device assignment if `None`. The
      `DeviceAssignment` may be omitted if each shard of the computation uses
      only one core, and there is either only one shard, or the number of shards
      is equal to the number of cores in the TPU system.
    name: (Deprecated) Does nothing.
    xla_options: An instance of `tpu.XLAOptions` which indicates the options
      passed to XLA compiler. Use `None` for default options.
  Returns:
    A list of output tensors.
  Raises:
    ValueError: If `num_shards <= 0`
  )rh  r   r!   rE   r   )rz  )r   r_   rh  r   r!   rE   r   s          r8   batch_parallelr|  U  s'    j 
)
 r:   ztpu.rewritec                 4    t        | |dn|g||||      d   S )a  Rewrites `computation` for execution on a TPU system.

  Args:
    computation: A Python function that builds a computation to apply to the
      input. If the function takes n inputs, 'inputs' should be a list of n
      tensors.

      `computation` may return a list of operations and tensors. Tensors must
      come before operations in the returned list.  The return value of
      `rewrite` is a list of tensors corresponding to the tensors from the
      output of `computation`.

      All `Operation`s constructed during `computation` will be executed when
      evaluating any of the returned output tensors, not just the ones returned.
    inputs: A list of input tensors or `None` (equivalent to an empty list).
      Each input can be a nested structure containing values that are
      convertible to tensors. Note that passing an N-dimension list of
      compatible values will result in a N-dimension list of scalar tensors
      rather than a single Rank-N tensors. If you need different behavior,
      convert part of inputs to tensors with `tf.convert_to_tensor`.
    infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple
      of arguments as inputs to `computation`.
    device_assignment: if not `None`, a `DeviceAssignment` describing the
      mapping between logical cores in the computation with physical cores in
      the TPU topology. May be omitted for a single-core computation, in which
      case the core attached to task 0, TPU device 0 is used.
    name: (Deprecated) Does nothing.
    xla_options: An instance of `tpu.XLAOptions` which indicates the options
      passed to XLA compiler. Use `None` for default options.
  Returns:
    Same data structure as if computation(*inputs) is called directly with some
    exceptions for correctness. Exceptions include:
      1) None output: a NoOp would be returned which control-depends on
         computation.
      2) Single value output: A tuple containing the value would be returned.
      3) Operation-only outputs: a NoOp would be returned which
         control-depends on computation.
      TODO(b/121383831): Investigate into removing these special cases.
  Nrm  r   )r   )r   r_   r   r!   rE   r   s         r8   rewriter~    s6    d 
nd6()
  !
" "r:   )ReadVariableOpAssignVariableOpAssignAddVariableOpAssignSubVariableOpr0   Variable
VariableV2c                  8   t        j                         } | r| j                         }|r t        |t              ry|j
                  }|r t        | t        j                        r| j                  } n(t        | t        j                        r| j                  } ny| ry)z6Check if it is currently under `_TPUInferenceContext`.TF)r   r  _get_control_flow_contextr   _TPUInferenceContextouter_contextr   
_FuncGraph_outer_graphr   r  outer_graph)rC  rH  s     r8   under_tpu_inference_contextr    s    



!%--/G
	G1	2%%g  %,,-  e	E://	0e 	 
r:   c                   V     e Zd ZdZd
dedef fdZd Zd Zd Z	d Z
ed	        Z xZS )r  zA `ControlFlowContext` for nodes inside a TPU inference computation.

  The primary role of `_TPUInferenceContext` is to indicate the mode of
  operation and possibly sanity check operators inside a
  tpu.rewrite_for_inference() computation.
  rE   	check_opsc                 F    t         t        |           || _        || _        y rv   )rw   r  __init___name
_check_ops)selfrE   r  r|   s      r8   r  z_TPUInferenceContext.__init__  s    	
.0DJDOr:   c                 &    | j                  |       y rv   _AddOpInternalr  rV   s     r8   AddOpz_TPUInferenceContext.AddOp      r:   c                     | j                   r8|j                  t        v r&t        d|j                   d|j                   d      | j
                  r| j
                  j                  |       y y )NzOperation of type z (z) is not supported on the TPU for inference. Execution will fail if this op is used in the graph. Make sure your variables are using variable_scope.)r  r\   _DENYLISTED_INFERENCE_OPSNotImplementedErrorrE   _outer_context
AddInnerOpr  s     r8   r  z#_TPUInferenceContext._AddOpInternal  si    277&??rwwir"'' 3F FG G 
$$R( r:   c                 X    |}| j                   r| j                   j                  |      }|S rv   )r  AddValue)r  valr   s      r8   r  z_TPUInferenceContext.AddValue	  s+    F""++C0fMr:   c                 &    | j                  |       y rv   r  r  s     r8   r  z_TPUInferenceContext.AddInnerOp  r  r:   c                      y rv   r7   )r  s    r8   
grad_statez_TPUInferenceContext.grad_state  s    r:   )T)rm   rn   ro   rp   r   ri   r  r  r  r  r  propertyr  r}   r~   s   @r8   r  r    sC     4  D  
)  r:   r  rC  c                 Z    t        d | j                         D              st        d      y)a  Validates whether rewrite_for_inference() 'worked' for variables.

     The rewrite_for_inference() method is supposed to append GuaranteeConstOps
     after ReadVariableOps, but this mechanism works only if you are using
     tf.compat.v1.get_variable() to create and access variables in your tpu
     computation. This validation method can be called immediately after calling
     tpu.rewrite_for_inference() to check whether GuaranteeConstOps where added
     to the graph.

     Typical usages:
       tpu.validate_inference_rewrite_for_variables(
           tf.compat.v1.get_default_graph())

       tpu.validate_inference_rewrite_for_variables(sess.graph)

  Args:
    graph: The graph which needs to be validated.
  Raises:
    RuntimeError: if validation failed.
  c              3   :   K   | ]  }|j                   d k(    yw)GuaranteeConstN)r\   r   s     r8   r   z;validate_inference_rewrite_for_variables.<locals>.<genexpr>,  s     HAQVV''Hs   zNo GuaranteeConst ops found in the graph after running tpu.rewrite_for_inference(...). Please check that you are using tf.get_variable() to create and access variables in your tpu computation.N)r   get_operationsRuntimeError)rC  s    r8   (validate_inference_rewrite_for_variablesr    s3    * 
H1E1E1GH	H
	  
Ir:   c                 8     d  fd}t        |||||      S )a  Rewrites `computation` for inference on a TPU system.

     Other than 'rewriting' the computation to run on a TPU, if using variables
     in your computation, it moves the ReadVariableOps outside the TPU
     computation, and adds GuaranteeConst ops just after the ReadVariableOps.
     This mechanism works only if you are using tf.compat.v1.get_variable() to
     create and access variables in your tpu computation. You can validate
     whether this worked, by calling validate_inference_rewrite_for_variables()
     method immediately after this method to check whether GuaranteeConstOps
     where added to the graph.

  Args:
    computation: A Python function that builds a computation to apply to the
      input. If the function takes n inputs, 'inputs' should be a list of n
      tensors. If the function returns m outputs, rewrite will return a list of
      m tensors.
    inputs: A list of input tensors or `None` (equivalent to an empty list).
    infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple
      of arguments as inputs to `computation`.
    device_assignment: if not `None`, a `DeviceAssignment` describing the
      mapping between logical cores in the computation with physical cores in
      the TPU topology. May be omitted for a single-core computation, in which
      case the core attached to task 0, TPU device 0 is used.
    name: The name of the operator.
  Returns:
    A list of output tensors.
  c                     t        j                  d       5  t        j                   | |g|i ||dz         cd d d        S # 1 sw Y   y xY w)Nz/GuaranteeConstrD   )r   rI   r   guarantee_const)r   rE   r   r   s       r8   guarantee_const_getterz5rewrite_for_inference.<locals>.guarantee_const_getterV  sR    		!	!$	' H&&

'
'
'd5F.FHH H Hs   $AAc                     t        t        j                         j                  d            }	 |j	                          t        j                         }|j                  }|j                  }|j                         |j                  d         | i |}|j                  |       |j                  |       |j                          |S # |j                          w xY w)z1Execute computation under `_TPUInferenceContext`.rewrite_for_inferencerD   c                     | j                   S rv   )rG   )rV   s    r8   r   zDrewrite_for_inference.<locals>.wrapped_computation.<locals>.<lambda>f  s
    299 r:   )r  r   r  r  r  r    r!  r   caching_devicer$  set_caching_devicer/  )	r   r   rH  rM  prev_custom_getterprev_caching_devicer   r   r  s	          r8   wrapped_computationz2rewrite_for_inference.<locals>.wrapped_computation[  s    """$001HIKGmmo002f!//"1156 45D+F+f12 34llnM llns   B	C C)r_   r   r!   rE   )r~  )r   r_   r   r!   rE   r  r  s   `     @r8   r  r  4  s-    DH
, 
)
 r:   prune_graphc                    | g| j                   j                         D cg c]  }| c}z   D ]  }t        |t        j                        s|j                         D ]  }|j                  t        vrd}|j                  D ]  }|j                         sd} n |r@t        j                  d|j                  |j                         |j                  t        j                           yc c}w )a  Prunes unconnected ops as listed in _UNCONNECTED_OPS_TO_PRUNE.

  Args:
    prune_graph: A tensorflow graph from which we wish to prune unconnected ops
      as listed in _UNCONNECTED_OPS_TO_PRUNE.  In general, these ops should have
      no inputs and no consumers. These can often be left behind due to graph
      construction rewiring (for instance TF-Hub). While they never execute,
      they will cause XLA compile to fail so we strip them from XLA compile by
      removing the tpu_replicate attribute.
  FTzGPruning OP %s of type %s from XLA Compile due to it being disconnected.N)
_functionsvaluesr   r   Graphr  r\   _UNCONNECTED_OPS_TO_PRUNErO  	consumersr   r  rE   _clear_attrr&   _TPU_REPLICATE_ATTR)r  frC  rV   outputs_consumedoutputs         r8   prune_unconnected_ops_from_xlar  z  s     }''..0 a   <e eSYY'""$ <	1	1JJ &!

 %&(ggrww	8 	::;<<  s   	C*)NNTNrv   )NNNNNNN)NNNNTNNN)	NrA   NTNNNNN)NrA   NNNN)NNNNN)NNNN)prp   collectionsenumtypingr   r   r   r   r   r   r	   r
   abslr   numpyr   !tensorflow.compiler.tf2xla.pythonr   r  tensorflow.core.frameworkr   tensorflow.core.protobuf.tpur   r   r   embedding_pb2tensorflow.pythonr   tensorflow.python.compiler.xlatensorflow.python.frameworkr   r   r   r   r   r   r   r   r   tensorflow.python.opsr   r   r   r   r   r    tensorflow.python.tpur!   device_assignment_libr"   r#   r$   r%   r&   tensorflow.python.tpu.opsr'   tensorflow.python.typesr(   
core_typestensorflow.python.utilr)   r*   r+   r,   r-    tensorflow.python.util.tf_exportr.   setr  r   r2  r  r9   TPUEmbeddingConfigurationri   TensorrP   r^  rR   rU   register_acd_resource_resolverObjectIdentitySetrj   IntEnumrl   
namedtuplert   filter_tracebackInfeedQueueDeviceAssignmentr   r   r  r   r   r   r   r&  r'  intrx  rz  r|  r~  r  r  XLAControlFlowContextr  r  r  r  r  r7   r:   r8   <module>r     s	    '   N N N   ; 4 O Y ! . 9 8 . 3 . 2 0 + 4 + 1 & 2 * 0 L / * . / 1 - 6 ) ' 2 2 1 6
   >? 2 8 )  /$ /D / &'(JN-148	6D}FFG6D	$6D '+6D $,D>	6D
 6D )6Dv A#==A	$A 	]]A: $%&"$ "3== " '" 11 D D#55 D %66 D <@ D 2 DF  !"$,,  # EK<'#* E E4  !! 7;37JN$(*.(,h"#s(#h"T$z00123h" 8//0h"   5 F FG	h"
 4.h" SMh" ;'h" *%h" 26ch" " !h"V@%Z&&'@%!9!9:;@% @% 4S	?D!;!;<<=	@%H 48)\ 7;37JN$(*.(,R.#s(#R.T$z00123R. 8//0R.   5 F FG	R.
 4.R. R. SMR. ;'R. *%R. 
$z  
!"R.j^>^> ^> 4**+,d3==.A49LM^>B<#<# <# 4**+,d3==.A49LM<#B AE,07;-137JN(,_#s(#_T$x
(9(9:;<=_ _ tCy)	_
 #4d#34_  S	*_ 8//0_   5 F FG_ 4._ *%_ 
s}}d:#4#455	6_D {m!! 15,07;-137JN(,R"#s(#R"T*++,-R" R" tCy)	R"
 #4d#34R"  S	*R" 8//0R"   5 F FGR" 4.R" *%R" 26j6G6G1HR" " R"j #$%!! AE37JN(,:#s(#:T$x
(9(9:;<=: : 8//0	:
   5 F FG: 4.: *%: " &:z }o!! AE37JN(,6"#s(#6"T$x
(9(9:;<=6" 8//06"   5 F FG	6"
 4.6" *%6" 256" " 6"x   !  T $%+AA %PCII > 1537JNB#s(#BT*++,-B 8//0B   5 F FG	B
 4.B
 %)):):$;BL<		 <r:   