
    2Vh"n                         d dl Z d dlZd dlZd dlZd dlmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ  ed      	 dd       Z	 	 dd	Zd
 Zd Zd Zd Zd Zd Zd Zd Z	 	 	 	 	 ddZd Zd Zd Zd Zd Z y)    N)
ThreadPool)tree)keras_export)io_utils)
tensorflowzkeras.utils.split_datasetc                    t        |       }|t        d|  dt        |              ||t        d      t	        | |      }|rK|t        j                  dt        d            }t        j                  |       t        j                  |       t        |      }t        |||      \  }}t        |d|       }t        || d       }	t        |||       }t        |	||       }	t        j                  j                   j#                  |      }t        j                  j                   j#                  |	      }	|t        j                  j                   u r:t%        |       r/t'        |       }
|
"|j)                  |
      }|	j)                  |
      }	|j+                  t        j                  j,                        }|	j+                  t        j                  j,                        }	||	fS )a0  Splits a dataset into a left half and a right half (e.g. train / test).

    Args:
        dataset:
            A `tf.data.Dataset`, a `torch.utils.data.Dataset` object,
            or a list/tuple of arrays with the same length.
        left_size: If float (in the range `[0, 1]`), it signifies
            the fraction of the data to pack in the left dataset. If integer, it
            signifies the number of samples to pack in the left dataset. If
            `None`, defaults to the complement to `right_size`.
            Defaults to `None`.
        right_size: If float (in the range `[0, 1]`), it signifies
            the fraction of the data to pack in the right dataset.
            If integer, it signifies the number of samples to pack
            in the right dataset.
            If `None`, defaults to the complement to `left_size`.
            Defaults to `None`.
        shuffle: Boolean, whether to shuffle the data before splitting it.
        seed: A random seed for shuffling.

    Returns:
        A tuple of two `tf.data.Dataset` objects:
        the left and right splits.

    Example:

    >>> data = np.random.random(size=(1000, 4))
    >>> left_ds, right_ds = keras.utils.split_dataset(data, left_size=0.8)
    >>> int(left_ds.cardinality())
    800
    >>> int(right_ds.cardinality())
    200
    NzThe `dataset` argument must be eithera `tf.data.Dataset`, a `torch.utils.data.Dataset`object, or a list/tuple of arrays. Received: dataset=z	 of type zoAt least one of the `left_size` or `right_size` must be specified. Received: left_size=None and right_size=Noner       .A)_get_type_spec	TypeErrortype
ValueError_convert_dataset_to_listrandomrandintintseedshufflelen_rescale_dataset_split_sizeslist_restore_dataset_from_listtfdataDatasetfrom_tensor_slices
is_batchedget_batch_sizebatchprefetchAUTOTUNE)dataset	left_size
right_sizer   r   dataset_type_specdataset_as_listtotal_length
left_splitright_split
batch_sizes              M/home/dcms/DCMS/lib/python3.12/site-packages/keras/src/utils/dataset_utils.pysplit_datasetr+      s   J 'w/ ! ")	4=/C
 	
 i/
 	
 /w8IJO<>>!SX.DD''L8:|Iz ojy12J
{|45K+%wJ -&K 33J?J''//44[AK BGGOO+
70C#G,
!#))*5J%++J7K$$RWW%5%56J&&rww'7'78K{""    c                     t        | |      }g }t        j                         }t        ||||      D ]  }|j                  |        |S )az  Convert `dataset` object to a list of samples.

    Args:
        dataset: A `tf.data.Dataset`, a `torch.utils.data.Dataset` object,
            or a list/tuple of arrays.
        dataset_type_spec: the type of the dataset.
        data_size_warning_flag: If set to `True`, a warning will
            be issued if the dataset takes longer than 10 seconds to iterate.
            Defaults to `True`.
        ensure_shape_similarity: If set to `True`, the shape of
            the first sample will be used to validate the shape of rest of the
            samples. Defaults to `True`.

    Returns:
        List: A list of samples.
    )_get_data_iterator_from_datasettime_get_next_sampleappend)r!   r$   data_size_warning_flagensure_shape_similaritydataset_iteratorr%   
start_timesamples           r*   r   r   k   s_    , 7" OJ"	 ' 	v&' r,   c                 P   |t         u rt        |       dk(  rt        d      d}t        |       D ]  \  }}t	        |t
        j                        st        dt        |       d| d      ||j                  }J|j                  d   |d   k7  s`t        d| d| d	t        j                  |      j                   d
       t        t        |        S |t        u rt        |       dk(  rt        d      d}t        |       D ]  \  }}t	        |t
        j                        st        dt        |       d| d      ||j                  }J|j                  d   |d   k7  s`t        d| d| d	t        j                  |      j                   d       t        t        |        S |t        j                  j                  u r&t!        |       r| j#                         } t        |       S t%        |       rt        |       S |t
        j                  u rt        |       S t        d|       )a  Get the iterator from a dataset.

    Args:
        dataset: A `tf.data.Dataset`, a `torch.utils.data.Dataset` object,
            or a list/tuple of arrays.
        dataset_type_spec: The type of the dataset.

    Returns:
        iterator: An `iterator` object.
    r   zJReceived an empty list dataset. Please provide a non-empty list of arrays.Nz5Expected a list of `numpy.ndarray` objects,Received: 
 at index .zOReceived a list of NumPy arrays with different lengths.Mismatch found at index z, Expected shape=z Received shape=z:.Please provide a list of NumPy arrays of the same length.zJReceived an empty list dataset.Please provide a non-empty tuple of arrays.z6Expected a tuple of `numpy.ndarray` objects,Received: zPReceived a tuple of NumPy arrays with different lengths.Mismatch found at index z;.Please provide a tuple of NumPy arrays of the same length.zInvalid dataset_type_spec: )r   r   r   	enumerate
isinstancenpndarrayr   shapearrayiterziptupler   r   r   r   unbatchis_torch_dataset)r!   r$   expected_shapeielements        r*   r.   r.      se    D w<1= 
 #G, 	JAwgrzz2 !!%gz!A?  %!(q!^A%66 //0c 2&&4%5 6&&(hhw&7&=&=%> ?PP 	" CM""	e	#w<1> 
 #G, 	JAwgrzz2 !!%gz!A?  %!(q!^A%66 //0c 2&&4%5 6&&(hhw&7&=&=%> ?QQ 	" CM""	bggoo	-goo'GG}	'	"G}	bjj	(G}
23D2EF
GGr,   c           
   #     K   ddl m} 	 t        |       } t        |       }t	        |t
        j                  t        j                  f      s ||      r t        j                  |      j                  }nd}d}| t        |       D ]  \  }}|rq|t        j                  |      j                  k7  rOt        dt        j                  |      j                   dt        j                  |      j                   d| d	      |rM|d
z  dk(  rEt        j                         }	t        |	|z
        d
kD  r |rt!        j"                  dt$        d       d}|  y# t        $ r t        d      w xY ww)a  Yield data samples from the `dataset_iterator`.

    Args:
        dataset_iterator: An `iterator` object.
        ensure_shape_similarity: If set to `True`, the shape of
            the first sample will be used to validate the shape of rest of the
            samples. Defaults to `True`.
        data_size_warning_flag: If set to `True`, a warning will
            be issued if the dataset takes longer than 10 seconds to iterate.
            Defaults to `True`.
        start_time (float): the start time of the dataset iteration. this is
            used only if `data_size_warning_flag` is set to true.

    Yields:
        data_sample: The next sample.
    r   )is_torch_tensorNFzReceived an empty dataset. Argument `dataset` must be a non-empty list/tuple of `numpy.ndarray` objects or `tf.data.Dataset` objects.z<All `dataset` samples must have same shape, Expected shape: z Received shape: r8   r9   
   zThe dataset is taking longer than 10 seconds to iterate over. This may be due to the size of the dataset. Keep in mind that the `split_dataset` utility is only for small in-memory dataset (e.g. < 10,000 samples).r+   )categorysource)3keras.src.trainers.data_adapters.data_adapter_utilsrI   r@   nextr;   r   Tensorr<   r=   r?   r>   StopIterationr   r:   r/   r   warningswarnResourceWarning)
r4   r3   r2   r5   rI   first_samplefirst_sample_shaperF   r6   cur_times
             r*   r0   r0      so    ,
 01,-lRYY

$;<A
 "$,!7!=!=!%&+# /0 	6"!RXXf%5%;%;; '')xx'='C'C&D E'')xx'7'='=&>jc  "2v{99;x*,-27MMM3
 "1. .3*1  
,
 	

s   E,A0E :CE,E))E,c                     t        | d      rQ| j                  j                  D ]8  }|j                  dk(  st	        |j
                        j                  d      s8 y y)N	__class__r   ztorch.utils.dataTF)hasattrrX   __mro____name__str
__module__
startswith)r!   parents     r*   rD   rD   $  s]    w$''// 	F)+!!1j+,1- 		
 r,   c                    t        |       }t        |      }| /|t        t        fvr!||t        t        fvrt        d| d|       | |t        t        fvrt        d| d      ||t        t        fvrt        d| d      | dk(  r|dk(  rt	        d      |t        u r
| dk  s| |k\  s|t        u r| dk  s| d	k\  rt	        d
| d|        |t        u r
|dk  s||k\  s|t        u r|dk  s|d	k\  rt	        d| d|       ||cxu rt        u rn n|| z   d	kD  rt	        d      |t        u rt        | |z        } n|t        u rt        |       } |t        u rt        ||z        }n|t        u rt        |      }| ||z
  } n||| z
  }| |z   |kD  rt	        d| d| |z    d|       | df|dffD ]"  \  }}|dk(  st	        d| d|  d| d| d	       t        |       t        |      }} | |fS )a  Rescale the dataset split sizes.

    We want to ensure that the sum of
    the split sizes is equal to the total length of the dataset.

    Args:
        left_size: The size of the left dataset split.
        right_size: The size of the right dataset split.
        total_length: The total length of the dataset.

    Returns:
        tuple: A tuple of rescaled `left_size` and `right_size` integers.
    zjInvalid `left_size` and `right_size` Types. Expected: integer or float or None, Received: type(left_size)=z and type(right_size)=zTInvalid `left_size` Type. Expected: int or float or None, Received: type(left_size)=z.  zUInvalid `right_size` Type. Expected: int or float or None,Received: type(right_size)=r9   r   z]Both `left_size` and `right_size` are zero. At least one of the split sizes must be non-zero.   z=`left_size` should be either a positive integer smaller than z<, or a float within the range `[0, 1]`. Received: left_size=zB`right_size` should be either a positive integer and smaller than z< or a float within the range `[0, 1]`. Received: right_size=z^The sum of `left_size` and `right_size` is greater than 1. It must be less than or equal to 1.zCThe sum of `left_size` and `right_size` should be smaller than the z%. Received: left_size + right_size = zand total_length = leftrightzWith `dataset` of length=z, `left_size`=z and `right_size`=z.Resulting zN side dataset split will be empty. Adjust any of the aforementioned parameters)r   r   floatr   r   round)r"   r#   r&   left_size_typeright_size_typesplitsides          r*   r   r   .  s    )_N:&O 	.e"D?3,#FC4_4EG
 	
 U|!C))7(8=
 	
 /#u"E**9):!=
 	
 A~*/@
 	
 	#!^yL8U"!^yA~(> *>k
 	
 	31_
l :e#1_
a  ,~ .?l
 	
 	>2U2"Q&:
 	

 )l23		3	)$	%:45
	C	:&
 :-			!I-
:,##/. 122;j2H1I!,1
 	
 #F+j'-BC tA:+L>+/
| <!F #>>   	NC
OzIj  r,   c                 z   |t         t        t        j                  j                  fv st        |      r~| d   }| D cg c]  }t        j                  |       } }t        |  D cg c]  }t        j                  |       } }t        j                  ||       } t        j                  d | d      S | S c c}w c c}w )z,Restore the dataset from the list of arrays.r   c                 <    t        | t              rt        |       S | S N)r;   r   rB   xs    r*   <lambda>z,_restore_dataset_from_list.<locals>.<lambda>  s    *Q"5eAh 1 r,   F)top_down)rB   r   r   r   r   rD   r   flattenrA   r<   r?   pack_sequence_astraverse)r%   r$   original_datasetelement_specr6   s        r*   r   r     s     UD"''//::>N? 'q)>MNF4<</NN:=:OP288F+PP//oN}}<
 	
  OPs   B3$B8c                     t        | d      S )z*Check if the `tf.data.Dataset` is batched._batch_size)rY   r!   s    r*   r   r     s    7M**r,   c                 2    t        |       r| j                  S y)z"Get the batch size of the dataset.N)r   rw   rx   s    r*   r   r     s    '"""r,   c                 R   t        | t              rt        S t        | t              rt        S t        | t        j                        rt        j                  S t        | t
        j                  j                        rt
        j                  j                  S t        |       rddl	m} |S y)z!Get the type spec of the dataset.r   )r   N)
r;   rB   r   r<   r=   r   r   r   rD   torch.utils.data)r!   TorchDatasets     r*   r
   r
     sl    '5!	GT	"	GRZZ	(zz	GRWW__	-ww	'	"<r,   c           
      ~    |dk(  rg }t        t        j                  j                  j	                               D ]  }	t        j                  j                  j                  t        j                  j                  j                   |	            sU|	j                  d      rg|	j                  d      r|	dd }	|j                  |	        |Tt        |      j                  t        |            st        d| d|       |}ndg}||t        d	      t        d
      |}t        t        |t        t!        |                        }
t#               }g }g } fd|D        D ]+  }|j                  |j%                  t&        ||
||f             - g }|D ]+  }|j)                         \  }}|j                  |       ||z  }- |dk(  rJd}t+        j,                  t!        |      fd      }|D ]!  }||||t!        |      z    |t!        |      z  }# nm|d}nht!        |      t!        |      k7  r't        dt!        |       dt!        |       d  d      t        t        |            D cg c]  }t/        |       }}|rS|#t1        j2                  dt!        |       d       n.t1        j2                  dt!        |       dt!        |       d       |j5                          |j                          |D cg c],  }t        j                  j                  j                   |      . }}|r|t*        j6                  j9                  d      }t*        j6                  j;                  |      }|j=                  |       |0t*        j6                  j;                  |      }|j=                  |       |||fS c c}w c c}w )a  List all files in `directory`, with their labels.

    Args:
        directory: Directory where the data is located.
            If `labels` is `"inferred"`, it should contain
            subdirectories, each containing files for a class.
            Otherwise, the directory structure is ignored.
        labels: Either `"inferred"`
            (labels are generated from the directory structure),
            `None` (no labels),
            or a list/tuple of integer labels of the same size as the number
            of valid files found in the directory.
            Labels should be sorted according
            to the alphanumeric order of the image file paths
            (obtained via `os.walk(directory)` in Python).
        formats: Allowlist of file extensions to index
            (e.g. `".jpg"`, `".txt"`).
        class_names: Only valid if `labels="inferred"`. This is the explicit
            list of class names (must match names of subdirectories). Used
            to control the order of the classes
            (otherwise alphanumerical order is used).
        shuffle: Whether to shuffle the data. Defaults to `True`.
            If set to `False`, sorts the data in alphanumeric order.
        seed: Optional random seed for shuffling.
        follow_links: Whether to visits subdirectories pointed to by symlinks.
        verbose: Whether the function prints number of files found and classes.
            Defaults to `True`.

    Returns:
        tuple (file_paths, labels, class_names).
        - file_paths: list of file paths (strings).
        - labels: list of matching integer labels (same length as file_paths)
        - class_names: names of the classes corresponding to these labels, in
        order.
    inferredr9   /NzjThe `class_names` passed did not match the names of the subdirectories of the target directory. Expected: z0 (or a subset of it), but received: class_names= zKWhen `labels=None` (no labels), argument `class_names` cannot be specified.zWhen argument `labels` is specified, argument `class_names` cannot be specified (the `class_names` will be the sorted list of labels).c              3   r   K   | ].  }t         j                  j                  j                  |       0 y wrl   )r   iogfilejoin).0subdir	directorys     r*   	<genexpr>z"index_directory.<locals>.<genexpr>9  s&     NFBEEKK$$Y7Ns   47r   int32)dtypezfExpected the lengths of `labels` to match the number of files in the target directory. len(labels) is z while we found z files in directory zFound z files.z files belonging to z	 classes.r	   )sortedr   r   r   listdirisdirr   r^   endswithr1   setissubsetr   dictrA   ranger   r   apply_asyncindex_subdirectorygetr<   zerosr\   r   	print_msgcloser   r   RandomStater   )r   labelsformatsclass_namesr   r   follow_linksverbosesubdirsr   class_indicespoolresults	filenamesdirpathlabels_listrespartial_filenamespartial_labelsrF   labelfname
file_pathsrngs   `                       r*   index_directoryr     s   Z RUU[[00;< 	+Fuu{{  !1!1)V!DE((-s+!'NN6*	+ "{#,,S\: !!(	 *11<?  "G $"~ + 
 !: 
 K[%K0@*ABCM <DGINgN 
"-w?	

 K ',/GGI)>>*&&	'
 3y>+7;) 	%N2@F1q3~../^$$A	% 
 v;#i.(Dv;-/I/? @  ){!-  06c&k/BCes5zCC>I'7w?@Y( )+&'y2 	JJLIIKBKL"%%++""9e4LJL<99$$S)Dii##D)J))''-CKKv{**- D Ms   +N581N:c              #   ,  K   |s*t         j                  j                  j                  |       }nt	        j                  | |      }t        |d       D ]<  \  }}}t        |      D ](  }|j                         j                  |      s#||f * > y w)N)followlinksc                     | d   S )Nr    rm   s    r*   ro   z"iter_valid_files.<locals>.<lambda>v  s
    QqT r,   )key)r   r   r   walkosr   lowerr   )r   r   r   r   root_filesr   s           r*   iter_valid_filesr   q  s~     uu{{	*wwyl; >: "aE] 	"E{{}%%g.Ek!	""s   BB	Bc                    t         j                  j                  |       }t        | ||      }g }g }|D ]  \  }}	|j	                  ||          t
        j                  j                  j                  ||	      }
t
        j                  j                  j                  |t         j                  j                  |
|             }|j	                  |        ||fS )ad  Recursively walks directory and list image paths and their class index.

    Args:
        directory: string, target directory.
        class_indices: dict mapping class names to their index.
        follow_links: boolean, whether to recursively follow subdirectories
            (if False, we only list top-level images in `directory`).
        formats: Allowlist of file extensions to index (e.g. ".jpg", ".txt").

    Returns:
        tuple `(filenames, labels)`. `filenames` is a list of relative file
            paths, and `labels` is a list of integer labels corresponding
            to these files.
    )
r   pathbasenamer   r1   r   r   r   r   relpath)r   r   r   r   dirnamevalid_filesr   r   r   r   absolute_pathrelative_paths               r*   r   r   |  s     ggy)G"9lGDKFI" (emG,-((u5((RWW__]I>
 	'( fr,   c                 0   |s| |fS t        |t        |       z        }|dk(  r7t        j                  dt        |       |z
   d       | d|  } ||d|  }| |fS |dk(  r+t        j                  d| d       | | d } ||| d }| |fS t	        d|       )a  Potentially restrict samples & labels to a training or validation split.

    Args:
        samples: List of elements.
        labels: List of corresponding labels.
        validation_split: Float, fraction of data to reserve for validation.
        subset: Subset of the data to return.
            Either `"training"`, `"validation"`, or `None`.
            If `None`, we return all of the data.

    Returns:
        tuple (samples, labels), potentially restricted to the specified subset.
    trainingzUsing z files for training.N
validationz files for validation.z>`subset` must be either "training" or "validation", received: )r   r   r   r   r   )samplesr   validation_splitsubsetnum_val_sampless        r*    get_training_or_validation_splitr     s     *S\9:OS\O344HI	
 +O+,-o-.F F? 
<	VO#44JKL?*+,_,-.F F?	 **03
 	
r,   c                 &   t         j                  j                  j                  |       }|dk(  r.|j	                  d t         j                  j
                        }|S |dk(  r.|j	                  fdt         j                  j
                        }|S )a  Create a `tf.data.Dataset` from the list/tuple of labels.

    Args:
        labels: list/tuple of labels to be converted into a `tf.data.Dataset`.
        label_mode: String describing the encoding of `labels`. Options are:
        - `"binary"` indicates that the labels (there can be only 2) are encoded
            as `float32` scalars with values 0 or 1
            (e.g. for `binary_crossentropy`).
        - `"categorical"` means that the labels are mapped into a categorical
            vector.  (e.g. for `categorical_crossentropy` loss).
        num_classes: number of classes of labels.

    Returns:
        A `tf.data.Dataset` instance.
    binaryc                 X    t        j                  t        j                  | d      d      S )Nfloat32r   )axis)r   expand_dimscastrm   s    r*   ro   z#labels_to_dataset.<locals>.<lambda>  s    bnnRWWQ	%:D r,   )num_parallel_callscategoricalc                 0    t        j                  |       S rl   )r   one_hot)rn   num_classess    r*   ro   z#labels_to_dataset.<locals>.<lambda>  s    bjjK0 r,   )r   r   r   r   mapr    )r   
label_moder   label_dss     ` r*   labels_to_datasetr     s      ww11&9HX<<D!ww//   
 O 
}	$<<0!ww//   
 Or,   c                     | rd| cxk  rdk  sn t        d|        | s|r| r|st        d      |dvrt        d|       | r|r|t        d      yyy)	a  Raise errors in case of invalid argument values.

    Args:
        validation_split: float between 0 and 1, fraction of data to reserve for
            validation.
        subset: One of `"training"`, `"validation"`, or `"both"`. Only used if
            `validation_split` is set.
        shuffle: Whether to shuffle the data. Either `True` or `False`.
        seed: random seed for shuffling and transformations.
    r   ra   z6`validation_split` must be between 0 and 1, received: zBIf `subset` is set, `validation_split` must be set, and inversely.)r   r   bothNzF`subset` must be either "training", "validation" or "both", received: NzIf using `validation_split` and shuffling the data, you must provide a `seed` argument, to make sure that there is no overlap between the training and validation subset.)r   )r   r   r   r   s       r*   check_validation_split_argr     s     $4 8q 8)*,
 	
 	F-=&P
 	
 ==117:
 	
 GB
 	
 )5Gr,   )NNFN)TT)NTNFT)!r   r   r/   rQ   multiprocessing.poolr   numpyr<   	keras.srcr   keras.src.api_exportr   keras.src.utilsr   keras.src.utils.module_utilsr   r   r+   r   r.   r0   rD   r   r   r   r   r
   r   r   r   r   r   r   r   r,   r*   <module>r      s    	    +   - $ 9 )*BFX# +X#|   	$NHHVDNz!z2+
, 	K+\":#L<
r,   