
    VhL                        d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZmZmZmZ g dZ ed      Z edd	
      Zeeef   Zeedf   Z edee      Z G d dee         Z G d dee   e	e         Z G d deeedf            Z G d dee         Z  G d dee         Z! G d de      Z" G d dee         Z#efdee   deee$e%f      de
e   de&e#e      fdZ'y)     N)Sequence)castGenericIterableOptionalTypeVarUnion)
deprecated)default_generator	GeneratorrandpermTensor)DatasetIterableDatasetTensorDatasetStackDatasetConcatDatasetChainDatasetSubsetrandom_split_T_T_coT)	covariant._T_stackc                   $    e Zd ZdZdefdZddZy)r   a  An abstract class representing a :class:`Dataset`.

    All datasets that represent a map from keys to data samples should subclass
    it. All subclasses should overwrite :meth:`__getitem__`, supporting fetching a
    data sample for a given key. Subclasses could also optionally overwrite
    :meth:`__len__`, which is expected to return the size of the dataset by many
    :class:`~torch.utils.data.Sampler` implementations and the default options
    of :class:`~torch.utils.data.DataLoader`. Subclasses could also
    optionally implement :meth:`__getitems__`, for speedup batched samples
    loading. This method accepts list of indices of samples of batch and returns
    list of samples.

    .. note::
      :class:`~torch.utils.data.DataLoader` by default constructs an index
      sampler that yields integral indices.  To make it work with a map-style
      dataset with non-integral indices/keys, a custom sampler must be provided.
    returnc                     t        d      )Nz3Subclasses of Dataset should implement __getitem__.)NotImplementedErrorselfindexs     H/home/dcms/DCMS/lib/python3.12/site-packages/torch/utils/data/dataset.py__getitem__zDataset.__getitem__:   s    !"WXX    c                     t        | |g      S N)r   r    others     r"   __add__zDataset.__add__A   s    dE]++r$   N)r(   zDataset[_T_co]r   zConcatDataset[_T_co])__name__
__module____qualname____doc__r   r#   r)    r$   r"   r   r   '   s    $YE Y,r$   r   c                   "    e Zd ZdZdee   fdZy)r   aI  An iterable Dataset.

    All datasets that represent an iterable of data samples should subclass it.
    Such form of datasets is particularly useful when data come from a stream.

    All subclasses should overwrite :meth:`__iter__`, which would return an
    iterator of samples in this dataset.

    When a subclass is used with :class:`~torch.utils.data.DataLoader`, each
    item in the dataset will be yielded from the :class:`~torch.utils.data.DataLoader`
    iterator. When :attr:`num_workers > 0`, each worker process will have a
    different copy of the dataset object, so it is often desired to configure
    each copy independently to avoid having duplicate data returned from the
    workers. :func:`~torch.utils.data.get_worker_info`, when called in a worker
    process, returns information about the worker. It can be used in either the
    dataset's :meth:`__iter__` method or the :class:`~torch.utils.data.DataLoader` 's
    :attr:`worker_init_fn` option to modify each copy's behavior.

    Example 1: splitting workload across all workers in :meth:`__iter__`::

        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_DATALOADER)
        >>> # xdoctest: +SKIP("Fails on MacOS12")
        >>> class MyIterableDataset(torch.utils.data.IterableDataset):
        ...     def __init__(self, start, end):
        ...         super(MyIterableDataset).__init__()
        ...         assert end > start, "this example code only works with end >= start"
        ...         self.start = start
        ...         self.end = end
        ...
        ...     def __iter__(self):
        ...         worker_info = torch.utils.data.get_worker_info()
        ...         if worker_info is None:  # single-process data loading, return the full iterator
        ...             iter_start = self.start
        ...             iter_end = self.end
        ...         else:  # in a worker process
        ...             # split workload
        ...             per_worker = int(math.ceil((self.end - self.start) / float(worker_info.num_workers)))
        ...             worker_id = worker_info.id
        ...             iter_start = self.start + worker_id * per_worker
        ...             iter_end = min(iter_start + per_worker, self.end)
        ...         return iter(range(iter_start, iter_end))
        ...
        >>> # should give same set of data as range(3, 7), i.e., [3, 4, 5, 6].
        >>> ds = MyIterableDataset(start=3, end=7)

        >>> # Single-process loading
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=0)))
        [tensor([3]), tensor([4]), tensor([5]), tensor([6])]

        >>> # xdoctest: +REQUIRES(POSIX)
        >>> # Multi-process loading with two worker processes
        >>> # Worker 0 fetched [3, 4].  Worker 1 fetched [5, 6].
        >>> # xdoctest: +IGNORE_WANT("non deterministic")
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=2)))
        [tensor([3]), tensor([5]), tensor([4]), tensor([6])]

        >>> # With even more workers
        >>> # xdoctest: +IGNORE_WANT("non deterministic")
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=12)))
        [tensor([3]), tensor([5]), tensor([4]), tensor([6])]

    Example 2: splitting workload across all workers using :attr:`worker_init_fn`::

        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_DATALOADER)
        >>> class MyIterableDataset(torch.utils.data.IterableDataset):
        ...     def __init__(self, start, end):
        ...         super(MyIterableDataset).__init__()
        ...         assert end > start, "this example code only works with end >= start"
        ...         self.start = start
        ...         self.end = end
        ...
        ...     def __iter__(self):
        ...         return iter(range(self.start, self.end))
        ...
        >>> # should give same set of data as range(3, 7), i.e., [3, 4, 5, 6].
        >>> ds = MyIterableDataset(start=3, end=7)

        >>> # Single-process loading
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=0)))
        [3, 4, 5, 6]
        >>>
        >>> # Directly doing multi-process loading yields duplicate data
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=2)))
        [3, 3, 4, 4, 5, 5, 6, 6]

        >>> # Define a `worker_init_fn` that configures each dataset copy differently
        >>> def worker_init_fn(worker_id):
        ...     worker_info = torch.utils.data.get_worker_info()
        ...     dataset = worker_info.dataset  # the dataset copy in this worker process
        ...     overall_start = dataset.start
        ...     overall_end = dataset.end
        ...     # configure the dataset to only process the split workload
        ...     per_worker = int(math.ceil((overall_end - overall_start) / float(worker_info.num_workers)))
        ...     worker_id = worker_info.id
        ...     dataset.start = overall_start + worker_id * per_worker
        ...     dataset.end = min(dataset.start + per_worker, overall_end)
        ...

        >>> # Mult-process loading with the custom `worker_init_fn`
        >>> # Worker 0 fetched [3, 4].  Worker 1 fetched [5, 6].
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=2, worker_init_fn=worker_init_fn)))
        [3, 5, 4, 6]

        >>> # With even more workers
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=12, worker_init_fn=worker_init_fn)))
        [3, 4, 5, 6]
    r(   c                     t        | |g      S r&   )r   r'   s     r"   r)   zIterableDataset.__add__   s    T5M**r$   N)r*   r+   r,   r-   r   r   r)   r.   r$   r"   r   r   I   s    jX+WU^ +r$   r   c                   B    e Zd ZU dZeedf   ed<   deddfdZd Zd Z	y)	r   zDataset wrapping tensors.

    Each sample will be retrieved by indexing tensors along the first dimension.

    Args:
        *tensors (Tensor): tensors that have the same size of the first dimension.
    .tensorsr   Nc                 J    t        fdD              sJ d       | _        y )Nc              3   j   K   | ]*  }d    j                  d       |j                  d       k(   , yw)r   N)size).0tensorr2   s     r"   	<genexpr>z)TensorDataset.__init__.<locals>.<genexpr>   s0      
5;GAJOOA&++a.0
s   03zSize mismatch between tensors)allr2   )r    r2   s    `r"   __init__zTensorDataset.__init__   s3     
?F
 
 	+*	+ 
 r$   c                 @    t        fd| j                  D              S )Nc              3   (   K   | ]	  }|     y wr&   r.   )r6   r7   r!   s     r"   r8   z,TensorDataset.__getitem__.<locals>.<genexpr>   s     >vVE]>   )tupler2   r   s    `r"   r#   zTensorDataset.__getitem__   s    >>>>r$   c                 >    | j                   d   j                  d      S Nr   )r2   r5   r    s    r"   __len__zTensorDataset.__len__   s    ||A##A&&r$   )
r*   r+   r,   r-   r>   r   __annotations__r:   r#   rB   r.   r$   r"   r   r      s5     63; D ?'r$   r   c                   ^    e Zd ZU dZeeef   ed<   dee	   dee	   ddfdZ
d Zd	efd
Zd Zy)r   a  Dataset as a stacking of multiple datasets.

    This class is useful to assemble different parts of complex input data, given as datasets.

    Example:
        >>> # xdoctest: +SKIP
        >>> images = ImageDataset()
        >>> texts = TextDataset()
        >>> tuple_stack = StackDataset(images, texts)
        >>> tuple_stack[0] == (images[0], texts[0])
        >>> dict_stack = StackDataset(image=images, text=texts)
        >>> dict_stack[0] == {'image': images[0], 'text': texts[0]}

    Args:
        *args (Dataset): Datasets for stacking returned as tuple.
        **kwargs (Dataset): Datasets for stacking returned as dict.
    datasetsargskwargsr   Nc                 V    |rG|rt        d      t        |d          _        t         fd|D              rt        d      | _        y |rSt        |j                               }t        |d          _        t         fd|D              rt        d      | _        y t        d      )NztSupported either ``tuple``- (via ``args``) or``dict``- (via ``kwargs``) like input/output, but both types are given.r   c              3   N   K   | ]  }j                   t        |      k7    y wr&   _lengthlenr6   datasetr    s     r"   r8   z(StackDataset.__init__.<locals>.<genexpr>   s     DG4<<3w</D   "%zSize mismatch between datasetsc              3   N   K   | ]  }j                   t        |      k7    y wr&   rJ   rM   s     r"   r8   z(StackDataset.__init__.<locals>.<genexpr>   s     CG4<<3w</CrO   z%At least one dataset should be passed)
ValueErrorrL   rK   anyrE   listvalues)r    rF   rG   tmps   `   r"   r:   zStackDataset.__init__   s     ^  tAw<DLDtDD !ABB DMv}}'Cs1v;DLCsCC !ABB"DMDEEr$   c                     t        | j                  t              r1| j                  j                         D ci c]  \  }}||    c}}S t	        fd| j                  D              S c c}}w )Nc              3   (   K   | ]	  }|     y wr&   r.   )r6   rN   r!   s     r"   r8   z+StackDataset.__getitem__.<locals>.<genexpr>  s     AWU^Ar=   )
isinstancerE   dictitemsr>   )r    r!   krN   s    `  r"   r#   zStackDataset.__getitem__   sW    dmmT*8<8K8K8MN*!WAwu~%NNA4==AAA Os   A+indicesc           	         t        | j                  t              r|D cg c]  }i  }}| j                  j                         D ]  \  }}t	        t        |dd             re|j                  |      }t        |      t        |      k7  r#t        dt        |       dt        |             t        ||      D ]
  \  }}|||<    t        ||      D ]  \  }	}||	   ||<     |S |D cg c]  }g  }
}| j                  D ]  }t	        t        |dd             rq|j                  |      }t        |      t        |      k7  r#t        dt        |       dt        |             t        ||
      D ]  \  }}|j                  |        t        ||
      D ]  \  }	}|j                  ||	            |
D cg c]  }t        |       }}|S c c}w c c}w c c}w )N__getitems__z0Nested dataset's output size mismatch. Expected z, got )rX   rE   rY   rZ   callablegetattrr^   rL   rQ   zipappendr>   )r    r\   _
dict_batchr[   rN   rZ   datad_sampleidx
list_batcht_samplesampletuple_batchs                 r"   r^   zStackDataset.__getitems__  s   dmmT*5<(=(=J(="mm113 3
7GG^TBC#009E5zS\1()),WfSZLJ  +.eZ*@ +h&*+ *-Wj)A 3X&-cl33  /6!6"!6
!6}} 	2G>?,,W5u:W-$%%(\N&UF  '*%&< *ND(OOD)* &)*%= 2MCOOGCL12	2 DN&NuV}&N&NA )>" "7 'Os   	G)	G8Gc                     | j                   S r&   )rK   rA   s    r"   rB   zStackDataset.__len__(  s    ||r$   )r*   r+   r,   r-   r	   r>   rY   rC   r   r   r:   r#   rS   r^   rB   r.   r$   r"   r   r      sV    $ E4K  Fgen F F4 F(B
#D #Jr$   r   c                        e Zd ZU dZeee      ed<   ee   ed<   e	d        Z
dee   ddf fdZd Zd	 Ze ed
e      d               Z xZS )r   zDataset as a concatenation of multiple datasets.

    This class is useful to assemble different existing datasets.

    Args:
        datasets (sequence): List of datasets to be concatenated
    rE   cumulative_sizesc                 d    g d}}| D ]&  }t        |      }|j                  ||z          ||z  }( |S r@   )rL   rb   )sequencersels        r"   cumsumzConcatDataset.cumsum8  sB    11 	AAAHHQUOFA	 r$   r   Nc                    t         |           t        |      | _        t	        | j                        dkD  sJ d       | j                  D ]  }t        |t              sJ d        | j                  | j                        | _        y )Nr   z(datasets should not be an empty iterablez.ConcatDataset does not support IterableDataset)	superr:   rS   rE   rL   rX   r   ru   rn   )r    rE   d	__class__s      r"   r:   zConcatDataset.__init__A  s    X4==!A%Q'QQ% 	@A!? @?@ 	@ !%DMM :r$   c                      | j                   d   S )Nrn   rA   s    r"   rB   zConcatDataset.__len__K  s    $$R((r$   c                     |dk  r(| t        |       kD  rt        d      t        |       |z   }t        j                  | j                  |      }|dk(  r|}n|| j                  |dz
     z
  }| j
                  |   |   S )Nr   z8absolute value of index should not exceed dataset length   )rL   rQ   bisectbisect_rightrn   rE   )r    rg   dataset_idx
sample_idxs       r"   r#   zConcatDataset.__getitem__N  s    7tc$i N  d)c/C))$*?*?E!Jt44[1_EEJ}}[)*55r$   z>`cummulative_sizes` attribute is renamed to `cumulative_sizes`)categoryc                     | j                   S r&   r|   rA   s    r"   cummulative_sizeszConcatDataset.cummulative_sizes\  s     $$$r$   )r*   r+   r,   r-   rS   r   r   rC   intstaticmethodru   r   r:   rB   r#   propertyr
   FutureWarningr   __classcell__ry   s   @r"   r   r   ,  s~     75>""3i ;'!2 ;t ;)6 H%	 
%r$   r   c                   >     e Zd ZdZdee   ddf fdZd Zd Z xZ	S )r   a_  Dataset for chaining multiple :class:`IterableDataset` s.

    This class is useful to assemble different existing dataset streams. The
    chaining operation is done on-the-fly, so concatenating large-scale
    datasets with this class will be efficient.

    Args:
        datasets (iterable of IterableDataset): datasets to be chained together
    rE   r   Nc                 0    t         |           || _        y r&   )rw   r:   rE   )r    rE   ry   s     r"   r:   zChainDataset.__init__p  s     r$   c              #   t   K   | j                   D ]#  }t        |t              sJ d       |E d {    % y 7 w)N*ChainDataset only supports IterableDataset)rE   rX   r   )r    rx   s     r"   __iter__zChainDataset.__iter__t  sF      	A? <;<  LL		 s   ,868c                 v    d}| j                   D ]'  }t        |t              sJ d       |t        |      z  }) |S )Nr   r   )rE   rX   r   rL   )r    totalrx   s      r"   rB   zChainDataset.__len__{  sO     	A? <;<  SVOE		
 r$   )
r*   r+   r,   r-   r   r   r:   r   rB   r   r   s   @r"   r   r   e  s*    !'!2 !t !r$   r   c                   z    e Zd ZU dZee   ed<   ee   ed<   dee   dee   ddfdZ	d Z
dee   dee   fdZd	 Zy)
r   z
    Subset of a dataset at specified indices.

    Args:
        dataset (Dataset): The whole Dataset
        indices (sequence): Indices in the whole set selected for subset
    rN   r\   r   Nc                      || _         || _        y r&   )rN   r\   )r    rN   r\   s      r"   r:   zSubset.__init__  s    r$   c                     t        |t              r*| j                  |D cg c]  }| j                  |    c}   S | j                  | j                  |      S c c}w r&   )rX   rS   rN   r\   )r    rg   is      r"   r#   zSubset.__getitem__  sK    c4 <<# >Qa >??||DLL-.. !?s   Ac                    t        t        | j                  dd             r6| j                  j                  |D cg c]  }| j                  |    c}      S |D cg c]  }| j                  | j                  |        c}S c c}w c c}w )Nr^   )r_   r`   rN   r^   r\   )r    r\   rg   s      r"   r^   zSubset.__getitems__  sn     GDLL.$?@<<,,7-SCdll3.?-STT?FGDLLc!23GG .TGs   B#Bc                 ,    t        | j                        S r&   )rL   r\   rA   s    r"   rB   zSubset.__len__  s    4<<  r$   )r*   r+   r,   r-   r   r   rC   r   r   r:   r#   rS   r^   rB   r.   r$   r"   r   r     sg     U^c] # 4 /
HDI H$u+ H!r$   r   rN   lengths	generatorr   c           
      d   t        j                  t        |      d      rt        |      dk  rg }t        |      D ]Y  \  }}|dk  s|dkD  rt	        d| d      t        t        j                  t        |       |z              }|j                  |       [ t        |       t        |      z
  }t        |      D ]  }|t        |      z  }||xx   dz  cc<    |}t        |      D ]$  \  }}	|	dk(  st        j                  d| d       & t        |      t        |       k7  rt	        d      t        t        |      |      j                         }
t        t        t
           |      }t!        t#        j$                  |      |      D 	cg c]  \  }}	t'        | |
||	z
  |        c}	}S c c}	}w )	a  
    Randomly split a dataset into non-overlapping new datasets of given lengths.

    If a list of fractions that sum up to 1 is given,
    the lengths will be computed automatically as
    floor(frac * len(dataset)) for each fraction provided.

    After computing the lengths, if there are any remainders, 1 count will be
    distributed in round-robin fashion to the lengths
    until there are no remainders left.

    Optionally fix the generator for reproducible results, e.g.:

    Example:
        >>> # xdoctest: +SKIP
        >>> generator1 = torch.Generator().manual_seed(42)
        >>> generator2 = torch.Generator().manual_seed(42)
        >>> random_split(range(10), [3, 7], generator=generator1)
        >>> random_split(range(30), [0.3, 0.3, 0.4], generator=generator2)

    Args:
        dataset (Dataset): Dataset to be split
        lengths (sequence): lengths or fractions of splits to be produced
        generator (Generator): Generator used for the random permutation.
    r~   r   zFraction at index z is not between 0 and 1zLength of split at index z- is 0. This might result in an empty dataset.zDSum of input lengths does not equal the length of the input dataset!)r   )mathisclosesum	enumeraterQ   r   floorrL   rb   rangewarningswarnr   tolistr   r   ra   	itertools
accumulater   )rN   r   r   subset_lengthsr   fracn_items_in_split	remainderidx_to_add_atlengthr\   offsets               r"   r   r     s   < ||CL!$W):$& ) 	4GAtax4!8 #5aS8O!PQQ"

3w<$./  !!"23	4 L3~#66	y! 	/AN 33M=)Q.)	/ !"7+ 	IAv{/s 3= >	 7|s7|#R
 	
 s7|y9@@BG8C='*G ")"6"6w"?IFF 	w&9:  s   F,)(r   r   r   r   collections.abcr   typingr   r   r   r   r   r	   typing_extensionsr
   torchr   r   r   r   __all__r   r   rY   str_T_dictr>   _T_tupler   r   r   r   r   r   r   r   r   floatrS   r   r.   r$   r"   <module>r      sF       $ E D ( A @	 T]4(
sEz
:x1,gen ,Dn+genhuo n+h'GE&#+./ '0T78$ Tn6%GEN 6%r? @!WU^ !H &7?R[?eCJ'(? 	"? 
&*	?r$   