
    AVh}/                     2   d Z ddlmZmZmZmZmZ ddlmZ ddl	Z
ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ dededededej:                  ddfdZ	 	 d&deeeej>                  ef         dee   deeej>                     ef   fdZ  edg       ddddejB                  fdeeeeeef      eeef   f      dedeeeej>                  ef         dee   de"dej:                  fd       Z# edg       dddejB                  fdeeeeef      eeef   f   ded eeeej>                  ef         dee   de"dej:                  fd!       Z$i Z% ed"g       	 	 d&dej:                  d#ee   d$ee   fd%       Z&y)'z%Utilities to help with mesh creation.    )DictListOptionalTupleUnion)loggingN)accelerator_util)api)config)layout)tpu_util)context)device)	array_ops)math_ops)	tf_exportnum_global_devicesnum_clients	client_iddevice_typemeshreturnc                 T   t        j                  d||       t        j                  d|j                         |        t        j                  d|j                                t        j                  d|j	                                t        j                  d|j                                y )NzThis is client %d of %d clientszNumber of global %s devices: %dzGlobal device IDs: %szLocal device IDs: %szLocal devices: %s)r   infoupperglobal_device_idslocal_device_idslocal_devicesr   r   r   r   r   s        S/home/dcms/DCMS/lib/python3.12/site-packages/tensorflow/dtensor/python/mesh_util.py_print_contextr!   "   sz    	,,0)[I	,,0+2C2C2E!# 
,,&(>(>(@A	,,%t'<'<'>?	,,"D$6$6$89    devicesc                    | |d}t        j                  |      } | |fS t        | d   t              r,| D cg c]!  }t        j
                  j                  |      # } }|| d   j                  }|j                         | d   j                  j                         k7  rt        dt        |        d|       | |fS c c}w )zAMakes device specs for all local devices or from a provided list.CPUr   zConflicting devices z and device_type )
r   r   
isinstancestr	tf_device
DeviceSpecfrom_stringr   r   
ValueError)r#   r   ds      r    _make_device_specsr-   .   s     _k"";/G 
+	 '!*c">EF%%11!4FgFAJ**kgaj44::<< W.?}
M  
+	 Gs   &B<z experimental.dtensor.create_mesh)v1 	mesh_dims	mesh_nameuse_xla_spmdc                 j   t        ||      \  }}t        j                  t        j                         dd      }|D cg c]  }|j                  |       }}t        | t              rt        | j                               } | dt        |      fg} n/t        |       dk(  r!| d   d   dk(  r| d   d   t        |      f| d<   | D cg c]  }|d   	 }}| D cg c]  }|d   	 }	}t        j                  |	      t        |      k7  rt        dt        |       d|	       t        j                  t        |            j                  |	      }
t        j                   |
      j#                         }t%        j&                  ||
||||      }t)        t        |      dd||	       |S c c}w c c}w c c}w )
a6  Creates a single-client mesh.

  If both `mesh_dims` and `devices` are specified, they must match each otehr.
  As a special case, when all arguments are missing, this creates a 1D CPU mesh
  with an empty name, assigning all available devices to that dimension.

  Args:
    mesh_dims: A dict of dim_name: dim_size, or a list of (dim_name, dim_size)
      tuples. Defaults to a single batch-parallel dimension called 'x' usin all
      devices. As a special case, a single-element mesh_dims whose dim_size is
      -1 also uses all devices.  e.g. `{'x' : 4, 'y' : 1}` or `[('x', 4), ('y',
      1)]`.
    mesh_name: Name of the created mesh. Defaults to ''.
    devices: String representations of devices to use. This is the device part
      of tf.DeviceSpec, e.g. 'CPU:0'. Defaults to all available logical devices.
    device_type: If `devices` is missing, the type of devices to use. Defaults
      to 'CPU'.
    use_xla_spmd: Boolean when True, will use XLA SPMD instead of DTensor SPMD.

  Returns:
    A single-client mesh created from specified or default arguments.
  r   jobreplicataskx   zlength of devices (3) must be equal to total size of the mesh of shape 	dim_namesr   r   r   r1   r2   r   )r-   r(   r)   r   job_namemake_merged_specr&   dictlistitemslennpprodr+   arangereshaperaveltolistr   Meshr!   )r0   r1   r#   r   r2   device_specs
local_specr,   r=   shaper   r   r   s                r    create_meshrN   F   s   < 1+F,##(911M*:FGQ*--a0G,G	4 Y__&'Is<()*I
9~y|A"4aLOS%67IaL&'qt')'"
#A1Q4
#%
#WWU^s<((
*3|+<*= >AAFI J J iiL 12::5AXX/0779	)' 
!$ \* 
+A H (
#s   F&F+F0z,experimental.dtensor.create_distributed_meshr   c           
         t        | t              rt        | j                               } t	        |  \  }}t        j                         st        d      |rC|j                         dk(  r0|.t        d|j                          d|j                          d      t        ||      \  }}|j                         dv rt        j                  t        j                         dt        j                               }|D 	cg c]  }	|j                  |	       }}	t!        |      t        j"                         z  }
t%        j&                  |      |
k7  r2t        d	t!        |       d
t        j"                          d|
 d|       t%        j(                  |
      j+                  |      }t%        j,                  |      j/                         }t!        |      t        j                         z  }|||t!        |      z    }t1        j2                  ||||||      }t5        |
t        j"                         t        j                         ||       |S |j                         dk(  rat7        j8                  ||||      }t5        t        j:                  |      t        j"                         t        j                         ||       |S t        d| d      c c}	w )a"  Creates a distributed mesh.

  This is similar to `create_mesh`, but with a different set of arguments to
  create a mesh that spans evenly across a multi-client DTensor cluster.

  For CPU and GPU meshes, users can choose to use fewer local devices than what
  is available `local_devices`.

  For TPU, only meshes that uses all TPU cores is supported by the DTensor
  runtime.

  Args:
    mesh_dims: A dict of dim_name: dim_size, or a list of (dim_name, dim_size)
      tuples. e.g. `{'x' : 4, 'y' : 1}` or `[('x', 4), ('y', 1)]`.
    mesh_name: Name of the created mesh. Defaults to ''.
    local_devices: String representations of devices to use. This is the device
      part of tf.DeviceSpec, e.g. 'CPU:0'. Defaults to all available local
      logical devices.
    device_type: Type of device to build the mesh for. Defaults to 'CPU'.
      Supported values are 'CPU', 'GPU', 'TPU'.6
    use_xla_spmd: Boolean when True, will use XLA SPMD instead of DTensor SPMD.

  Returns:
    A mesh that spans evenly across all DTensor clients in the cluster.
  zYAccelerators are uninitialized, please run dtensor.initialize_accelerator_system() first.TPUzDo not specify devices for z- meshes. Using a partial list of devices for z is not supported.)r%   GPUr   r4   zGlobal number of devices (z per client * z clients = r;   r<   )mesh_dim_names
mesh_shaper1   r2   zDevice type z is not CPU, GPU or TPU)r&   r@   rA   rB   zipr	   is_initializedr+   r   r-   r(   r)   r   r>   r   r?   rC   r   rD   rE   rF   rG   rH   rI   r   rJ   r!   r   create_tpu_meshr   )r0   r1   r   r   r2   r=   rM   rK   rL   r,   r   r   	flattened	start_idxr   r   s                   r    create_distributed_meshrY      s   B 	4 Y__&'I)_)U		(	(	*
 F G G [&&(E1  '(9(9(;'< =11<1B1B1D0E F   
 1L,N*
 %%OOqv/?/?/ACJ<HIqJ//2ILI \*V-?-?-AA	wwu~++, v/A/A/C.D E!" #66;W>? ? 		"45==eD*+224IL!F$4$4$66I 9s<7H+HI;;+)"!#D %v'9'9';V=M=M=O&KE!## !	#D
 !!+.0B0B0DK/ K\+.EFGGO Js   ;K
zexperimental.dtensor.barrierbarrier_nametimeout_in_msc                    |d}t        j                  d|       t        j                          t	        j
                  ddgt        | j                               z        }t        j                  |g| j                         z  t        j                  | j                  |             }t        j                  |      }|| j                   k7  r%t#        dj%                  || j                               t        j                          t        j                         j&                  rQ|d}t(        j+                  |d      }|dz   t(        |<   | d	| }t        j                         j-                  ||       t        j                  d
|       y)a  Runs a barrier on the mesh.

  Upon returning from the barrier, all operations run before the barrier
  would have completed across all clients. Currently we allocate a fully
  sharded tensor with mesh shape and run an all_reduce on it.

  Example:

  A barrier can be used before application exit to ensure completion of pending
  ops.

  ```python

  x = [1, 2, 3]
  x = dtensor.relayout(x, dtensor.Layout.batch_sharded(mesh, 'batch', 1))
  dtensor.barrier(mesh)

  # At this point all devices on all clients in the mesh have completed
  # operations before the barrier. Therefore it is OK to tear down the clients.
  sys.exit()
  ```

  Args:
    mesh: The mesh to run the barrier on.
    barrier_name: The name of the barrier. Mainly used for logging purpose.
    timeout_in_ms: The timeout of the barrier in ms. If omitted, blocks
      indefinitely till the barrier is reached from all clients.
  Nz	(barrier)zentering barrier before op: %sg      ?r9   zMGlobal barrier produced wrong mesh size : {0} while mesh has actualsize : {1}i \&r   :z8finished running barrier across all clients after op: %s)r   r   r   
async_waitr   rG   rC   rM   r
   packnum_local_devicesr   Layoutr=   r   
reduce_sumsizer+   formatcoordination_service_BARRIER_DICT
setdefaultwait_at_barrier)r   rZ   r[   	componentones	mesh_size	num_calls
barrier_ids           r    barrierrn      sI   @ L	,,/> 
 aS3tzz|+<%<=)	9+ 6 6 88--5
7$ !!$')$))
	VItyy13 3 
__++)m((q9I"+a-M, >9+.JOO%%j-@	,, %'r"   )NN)'__doc__typingr   r   r   r   r   abslr   numpyrD   tensorflow.dtensor.pythonr	   r
   r   r   r   tensorflow.python.eagerr   tensorflow.python.frameworkr   r(   tensorflow.python.opsr   r    tensorflow.python.util.tf_exportr   intr'   rJ   r!   r)   r-   USE_XLA_SPMDboolrN   rY   rf   rn    r"   r    <module>r|      s   , 5 5   6 ) , , . + ; + * 6:s : : : #:+1;;:;?: AE!%d5!5!5s!:;<=# 4	$$%s*+0 -"5HL@D!%,,@d5c?3T#s(^CDE@@ d5!5!5s!:;<=@ #	@
 @ [[@ 6@F 9bA FJ!%,,bHT%S/*DcN:;bHbH Dy';';S'@!ABCbH #	bH
 bH [[bH BbHJ  )b1*.+/B'&++ B'"3-B'#C=B' 2B'r"   