
    BVh-                     J   d Z ddlZddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ ddl
mZ ddl
mZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ  G d dej.                        Z ej2                  dd       ed      dd              Zd Zd Zddej:                  fdZy)zExperimental shuffle ops.    N)random_access)dataset_ops)random_seed)constant_op)dtypes)ops)	array_ops)gen_dataset_ops)math_ops)stateless_random_ops)deprecation)	tf_exportc                   $     e Zd ZdZd fd	Z xZS )_ShuffleAndRepeatDatasetz.A `Dataset` that fuses `shuffle` and `repeat`.c                 J   || _         t        j                  |t        j                  d      | _        |,t        j                  dt        j                  d      | _        n+t        j                  |t        j                  d      | _        t        j                  |      \  | _        | _        t        j                  | j                   j                  f| j
                  | j                  | j                  | j                  d| j                   }t"        t$        | O  ||       y )Nbuffer_size)dtypenamecount)r   r   seedseed2)_input_datasetr   convert_to_tensorr   int64_buffer_sizer   constant_countr   get_seed_seed_seed2r
   shuffle_and_repeat_dataset_variant_tensor_flat_structuresuperr   __init__)selfinput_datasetr   r   r   variant_tensor	__class__s         c/home/dcms/DCMS/lib/python3.12/site-packages/tensorflow/python/data/experimental/ops/shuffle_ops.pyr&   z!_ShuffleAndRepeatDataset.__init__%   s    'D--6<<m=D}((6<<gNdk))
v||'3dk)2248DJ$??++ %%kkZZkk  

 N 

"D2=3AC    NN)__name__
__module____qualname____doc__r&   __classcell__)r*   s   @r+   r   r   "   s    6C Cr,   r   zUse `tf.data.Dataset.shuffle(buffer_size, seed)` followed by `tf.data.Dataset.repeat(count)`. Static tf.data optimizations will take care of using the fused implementation.z$data.experimental.shuffle_and_repeatc                       fd}|S )a  Shuffles and repeats a Dataset, reshuffling with each repetition.

  >>> d = tf.data.Dataset.from_tensor_slices([1, 2, 3])
  >>> d = d.apply(tf.data.experimental.shuffle_and_repeat(2, count=2))
  >>> [elem.numpy() for elem in d] # doctest: +SKIP
  [2, 3, 1, 1, 3, 2]

  ```python
  dataset.apply(
    tf.data.experimental.shuffle_and_repeat(buffer_size, count, seed))
  ```

  produces the same output as

  ```python
  dataset.shuffle(
    buffer_size, seed=seed, reshuffle_each_iteration=True).repeat(count)
  ```

  In each repetition, this dataset fills a buffer with `buffer_size` elements,
  then randomly samples elements from this buffer, replacing the selected
  elements with new elements. For perfect shuffling, set the buffer size equal
  to the full size of the dataset.

  For instance, if your dataset contains 10,000 elements but `buffer_size` is
  set to 1,000, then `shuffle` will initially select a random element from
  only the first 1,000 elements in the buffer. Once an element is selected,
  its space in the buffer is replaced by the next (i.e. 1,001-st) element,
  maintaining the 1,000 element buffer.

  Args:
    buffer_size: A `tf.int64` scalar `tf.Tensor`, representing the maximum
      number elements that will be buffered when prefetching.
    count: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the number
      of times the dataset should be repeated. The default behavior (if `count`
      is `None` or `-1`) is for the dataset be repeated indefinitely.
    seed: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the random
      seed that will be used to create the distribution. See
      `tf.random.set_seed` for behavior.

  Returns:
    A `Dataset` transformation function, which can be passed to
    `tf.data.Dataset.apply`.
  c                      t        |       S )N)r   )datasetr   r   r   s    r+   	_apply_fnz%shuffle_and_repeat.<locals>._apply_fnm   s    #G[%FFr,    )r   r   r   r6   s   ``` r+   shuffle_and_repeatr8   :   s    fG 
r,   c                    g }d}t        j                  g       }d}t        j                  g       }d}d}| D ]   }|j                  |d          d}	d|v rE|d   dk  rt        dj	                  |d               |d   dk(  r|d   }	nt        |d   |d         }	|d   |	z
  }
d|v r<|d   dk  rt        d	j	                  |d               |d   dk7  rt        |d   |
      }
|d   |	z
  |
z
  }|
|d   k7  rd
}||
z  }t        j                  |||	z         }||	|z   z  }t        j                  ||      }||
z  } ||d}|r
||d<   ||d<   |S )aP  Computes aggregate information about files to read.

  The method collects information about the files to read, the total number of
  elements, and arrays that can be used to account for elements to be skipped,
  which can be specified via the "skip" and "take" keys.

  To account for elements to skip, the range of each file can be divided into
  three regions:
  - S (elements to skip)
  - T (elements to read)
  - R (remainder of elements that will also be skipped)

  The `thresholds` and `offsets` arrays are initialized as follows:
  `thresholds = [0, T_1, T_1 + T_2, ...]` and
  `offsets = [S_1, S_1 + R_1 + S_2, S_1 + R_1 + S_2 + R_2 + S_3, ...]`

  This makes it possible to map an index from a contiguous range
  `(0...num_elements_to_read)` to an index in the range of all elements,
  skipping over elements as per the "skip" and "take" keys values. In
  particular, for a given input index `X`, we find the greatest `thresholds`
  value that is smaller or equal to `X`. Let `t(X)` denotes such index in the
  `thresholds` array. The output index is computed as `X + offsets[t(X)]`.

  Args:
    file_infos: See `file_infos` argument of `index_shuffle` for details.

  Returns:
    A dictionary containing the following keys:
      - `files`, the vector of pathnames of files to read
      - `num_elements`, an integer identifying the total number of elements
      - `offsets`, the vector of offsets to use for index adjustment (in case
        any elements should be skipped)
      - `thresholds`, the vector of thresholds to use for index adjustment (in
        case any elements should be skipped)
  r   Fpathskipr   z-`skip` should be greater than `-1` but got {}num_elementstakez-`take` should be greater than `-1` but got {}T)filesr<   offsets
thresholds)npr   append
ValueErrorformatmin)
file_infosr>   r<   r?   
offset_sumr@   threshold_sumadjustment_needed	file_infor;   r=   	remainderresults                r+   _process_file_infosrM   s   s   H %,HHRL'*xx|*- i	LL6"#D	6	R	HOOf   	 	6	b	 (9V$i&?@^$t+D	6	R	HOOf   	  
6	b	 9V$d+.)D047Iy((DLiid!23G$""J:}5JTM56 L9&F9%F<	-r,   c           	          t        j                  t        j                  |t        j                  ||                   d   dz
  }| t        j
                  ||      z   S )z4Adjusts index to account for elements to be skipped.r      )r	   shapeboolean_maskr   
less_equalgather)indexr@   r?   t_indexs       r+   _adjust_indexrV      s_    OO



j%
023 456 9::' 
!!'73	33r,   Fc                     t        |       fd}t        j                  j                  ||      }|j	                  d      j                  dd      }|j                  |      S )a(  Creates a (globally) shuffled dataset from the given set of files.

  Unlike `tf.data.Dataset.shuffle()`, which uses an in-memory buffer to shuffle
  elements of input dataset in a streaming fashion,
  `tf.data.experimental.index_shuffle()` performs a global shuffle of element
  indices and then reads the data in a shuffled order. The advantage of
  `index_shuffle()` is that it can perform global shuffle of datasets that do
  not fit into memory (as long as the array of their indices does) and that the
  shuffling logic it provides is compatible with symbolic checkpointing. The
  disadvantage of `index_shuffle()` is that reading data in a shuffled random
  order will in general not be as efficient as reading data sequentially.

  Args:
    file_infos: A list of dictionaries that describe each file of the input
      dataset. Each dictionary is expected to contain the "path" key, which
      identifies the path of the file and the "num_elements" key, which
      identifies the number of elements in the file. In addition, the "skip"
      and "take" keys can be used to identify the number of elements to skip
      and take respectively. By default, no elements are skipped and all
      elements are taken.
    reader_factory: A function that maps a sequence of filenames to an instance
      of `tf.data.Dataset` that reads data from the files.
    seed: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the random
      seed that will be used to shuffle the order of elements. Default to
      non-deterministic seed.
    reshuffle_each_iteration: (Optional.) A `tf.bool` scalar `tf.Tensor`, that
      determines whether to change the shuffle order each iteration. Defaults to
      `False`.
    num_parallel_calls: (Optional.) A `tf.int64` scalar `tf.Tensor`, that
      determines the maximum number of random access operations to perform
      in parallel. By default, the tf.data runtime uses autotuning to determine
      the value dynamically.

  Returns:
    A `tf.data.Dataset` object, representing a globally shuffled dataset of
    the input data.
  c                      t         j                  j                  d         } fd}t        j                  | d               }|j                  |      S )Nr<   c                     t        j                  |d   dz
        }dv rdv rt        |d   d         }t        j                  | |      S )Nr<   rO   r@   r?   )r   index_shufflerV   r   at)r5   rT   shuffled_indexrL   seedss      r+   read_elementzEindex_shuffle.<locals>.sequential_index_shuffle.<locals>.read_element   se    +99
~.24n 
	I$7&~vl7K'-i'8: g~66r,   r>   )num_parallel_calls)r   Datasetrange	functoolspartialmap)r]   r5   r^   map_funcr_   reader_factoryrL   s   `   r+   sequential_index_shufflez/index_shuffle.<locals>.sequential_index_shuffle   sT    !!''~(>?G	7   ~fWo/NOH;;x4F;GGr,   )r   rerandomize_each_iteration   T)drop_remainder)rM   r   r`   randomr=   batchflat_map)rF   rf   r   reshuffle_each_iterationr_   rg   rng_dsrL   s    `  `  @r+   rZ   rZ      sh    V z*&H& %%!9 & ;& ;;q>$7&	1	22r,   r-   )r1   rb   numpyrA   'tensorflow.python.data.experimental.opsr   tensorflow.python.data.opsr   tensorflow.python.data.utilr   tensorflow.python.frameworkr   r   r   tensorflow.python.opsr	   r
   r   r   tensorflow.python.utilr    tensorflow.python.util.tf_exportr   UnaryUnchangedStructureDatasetr   
deprecatedr8   rM   rV   AUTOTUNErZ   r7   r,   r+   <module>r{      s        A 2 3 3 . + + 1 * 6 . 6C{II C0  ./ 121 3	/
1hJZ4 +0%0%9%9	D3r,   