
    VhB0                     (   d dl Z d dlmZ d dlmZmZ d dlmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZmZ d dlmZ g dZ e
d	d
      ZdefdZ ed       G d dee                Z ed       G d de             Z ed       G d dee                Zy)    N)defaultdict)IteratorSized)AnyCallableOptionalTypeVar)functional_datapipe)	DataChunkIterDataPipe)_check_unpickable_fn)BatcherIterDataPipeGrouperIterDataPipeUnBatcherIterDataPipe_T_coT)	covariantnamec                     | dv ret        j                  d|  d|  dt        d       t        t        j
                  j                  j                  j                  j                  |       S t        dt         d|        )	N)SHARDING_PRIORITIESShardingFilterIterDataPipe`zc` from `torch.utils.data.datapipes.iter.grouping` is going to be removed in PyTorch 2.1Please use `z5` from the `torch.utils.data.datapipes.iter.sharding`   )category
stacklevelzmodule z has no attribute )warningswarnFutureWarninggetattrtorchutilsdata	datapipesitershardingAttributeError__name__)r   s    X/home/dcms/DCMS/lib/python3.12/site-packages/torch/utils/data/datapipes/iter/grouping.py__getattr__r(      s{    DDv & UW"		
 u{{''1166??FF
78*,>tfE
FF    batchc                        e Zd ZU dZeed<   eed<   eed<   defdededede	e   ddf
 fd	Z
dee   fd
ZdefdZ xZS )r   a2  
    Creates mini-batches of data (functional name: ``batch``).

    An outer dimension will be added as ``batch_size`` if ``drop_last`` is set to ``True``, or ``length % batch_size`` for the
    last batch if ``drop_last`` is set to ``False``.

    Args:
        datapipe: Iterable DataPipe being batched
        batch_size: The size of each batch
        drop_last: Option to drop the last batch if it's not full
        wrapper_class: wrapper to apply onto each batch (type ``List``) before yielding,
            defaults to ``DataChunk``

    Example:
        >>> # xdoctest: +SKIP
        >>> from torchdata.datapipes.iter import IterableWrapper
        >>> dp = IterableWrapper(range(10))
        >>> dp = dp.batch(batch_size=3, drop_last=True)
        >>> list(dp)
        [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
    datapipe
batch_size	drop_lastFwrapper_classreturnNc                 r    |dkD  sJ d       t         |           || _        || _        || _        || _        y )Nr   z+Batch size is required to be larger than 0!)super__init__r,   r-   r.   r/   )selfr,   r-   r.   r/   	__class__s        r'   r3   zBatcherIterDataPipe.__init__A   s?     A~LLL~ $"*r)   c              #     K   g }| j                   D ]A  }|j                  |       t        |      | j                  k(  s-| j	                  |       g }C t        |      dkD  r!| j
                  s| j	                  |       y y y wNr   )r,   appendlenr-   r/   r.   )r4   r*   xs      r'   __iter__zBatcherIterDataPipe.__iter__O   s~      	ALLO5zT__,((//		
 u:>>>((// " s   ;BABc                 8   t        | j                  t              r`| j                  r"t	        | j                        | j
                  z  S t	        | j                        | j
                  z   dz
  | j
                  z  S t        t        |       j                   d      )N   z# instance doesn't have valid length)	
isinstancer,   r   r.   r9   r-   	TypeErrortyper&   r4   s    r'   __len__zBatcherIterDataPipe.__len__Z   ss    dmmU+~~4==)T__<<DMM*T__<q@T__TTtDz2233VWXXr)   )r&   
__module____qualname____doc__r   __annotations__intboolr   r@   r3   r   r;   rB   __classcell__)r5   s   @r'   r   r   %   sz    , OO  )2++ + 	+
 I+ 
+	0(9- 	0Y Yr)   r   unbatchc                   .    e Zd ZdZddedefdZd Zd Zy)	r   a   
    Undos batching of data (functional name: ``unbatch``).

    In other words, it flattens the data up to the specified level within a batched DataPipe.

    Args:
        datapipe: Iterable DataPipe being un-batched
        unbatch_level: Defaults to ``1`` (only flattening the top level). If set to ``2``,
            it will flatten the top two levels, and ``-1`` will flatten the entire DataPipe.

    Example:
        >>> # xdoctest: +SKIP
        >>> from torchdata.datapipes.iter import IterableWrapper
        >>> source_dp = IterableWrapper([[[0, 1], [2]], [[3, 4], [5]], [[6]]])
        >>> dp1 = source_dp.unbatch()
        >>> list(dp1)
        [[0, 1], [2], [3, 4], [5], [6]]
        >>> dp2 = source_dp.unbatch(unbatch_level=2)
        >>> list(dp2)
        [0, 1, 2, 3, 4, 5, 6]
    r,   unbatch_levelc                      || _         || _        y N)r,   rL   )r4   r,   rL   s      r'   r3   zUnBatcherIterDataPipe.__init__|   s     *r)   c              #   |   K   | j                   D ]'  }| j                  || j                        E d {    ) y 7 w)NrL   )r,   _diverL   )r4   elements     r'   r;   zUnBatcherIterDataPipe.__iter__   s:     }} 	MGzz'9K9KzLLL	MLs   0<:<c              #   v  K   |dk  rt        d      |dk(  r>t        |t        t        f      r#|D ]  }| j	                  |d      E d {     y | y |dk(  r| y t        |t        t        f      r&|D ]   }| j	                  ||dz
        E d {    " y t        d| j                   d      7 k7 "w)Nz unbatch_level must be -1 or >= 0rP   r   r=   zunbatch_level z" exceeds the depth of the DataPipe)
ValueErrorr>   listr   rQ   
IndexErrorrL   )r4   rR   rL   items       r'   rQ   zUnBatcherIterDataPipe._dive   s     2?@@B'D)#45# BD#zz$bzAAAB aM'D)#45# QD#zz$ma>OzPPPQ !$T%7%7$88Z[  B Qs%   AB9	B5
A
B9B7!B97B9N)r=   )	r&   rC   rD   rE   r   rG   r3   r;   rQ    r)   r'   r   r   d   s%    ,+ +c +Mr)   r   groupbyc                       e Zd ZdZdddddddee   deegef   ded	e	d
e
e	   de
e	   defdZd Zd ZddZd Zd Zd Zy)r   a!
  
    Groups data from IterDataPipe by keys from ``group_key_fn``, yielding a ``DataChunk`` with batch size up to ``group_size``.

    (functional name: ``groupby``).

    The samples are read sequentially from the source ``datapipe``, and a batch of samples belonging to the same group
    will be yielded as soon as the size of the batch reaches ``group_size``. When the buffer is full,
    the DataPipe will yield the largest batch with the same key, provided that its size is larger
    than ``guaranteed_group_size``. If its size is smaller, it will be dropped if ``drop_remaining=True``.

    After iterating through the entirety of source ``datapipe``, everything not dropped due to the buffer capacity
    will be yielded from the buffer, even if the group sizes are smaller than ``guaranteed_group_size``.

    Args:
        datapipe: Iterable datapipe to be grouped
        group_key_fn: Function used to generate group key from the data of the source datapipe
        keep_key: Option to yield the matching key along with the items in a tuple,
            resulting in `(key, [items])` otherwise returning [items]
        buffer_size: The size of buffer for ungrouped data
        group_size: The max size of each group, a batch is yielded as soon as it reaches this size
        guaranteed_group_size: The guaranteed minimum group size to be yielded in case the buffer is full
        drop_remaining: Specifies if the group smaller than ``guaranteed_group_size`` will be dropped from buffer
            when the buffer is full

    Example:
        >>> import os
        >>> # xdoctest: +SKIP
        >>> from torchdata.datapipes.iter import IterableWrapper
        >>> def group_fn(file):
        ...     return os.path.basename(file).split(".")[0]
        >>> source_dp = IterableWrapper(["a.png", "b.png", "a.json", "b.json", "a.jpg", "c.json"])
        >>> dp0 = source_dp.groupby(group_key_fn=group_fn)
        >>> list(dp0)
        [['a.png', 'a.json', 'a.jpg'], ['b.png', 'b.json'], ['c.json']]
        >>> # A group is yielded as soon as its size equals to `group_size`
        >>> dp1 = source_dp.groupby(group_key_fn=group_fn, group_size=2)
        >>> list(dp1)
        [['a.png', 'a.json'], ['b.png', 'b.json'], ['a.jpg'], ['c.json']]
        >>> # Scenario where `buffer` is full, and group 'a' needs to be yielded since its size > `guaranteed_group_size`
        >>> dp2 = source_dp.groupby(group_key_fn=group_fn, buffer_size=3, group_size=3, guaranteed_group_size=2)
        >>> list(dp2)
        [['a.png', 'a.json'], ['b.png', 'b.json'], ['a.jpg'], ['c.json']]
    Fi'  N)keep_keybuffer_size
group_sizeguaranteed_group_sizedrop_remainingr,   group_key_fnr\   r]   r^   r_   r`   c                4   t        |       || _        || _        || _        || _        t        t              | _        d| _        || _	        d | _
        ||d|cxk  r|k  sJ  J || _
        ||d|cxk  r|k  sJ  J || _
        || _        t        | _        y r7   )r   r,   ra   r\   max_buffer_sizer   rV   buffer_elementscurr_buffer_sizer^   r_   r`   r   r/   )r4   r,   ra   r\   r]   r^   r_   r`   s           r'   r3   zGrouperIterDataPipe.__init__   s     	\* ( *7B47H !$%)"!k&=z0[00000)3D& ,)a2G.U:.UUU.UUU)>D&,&r)   c                    d }d}d }| j                   j                         D ]8  }t        | j                   |         |kD  st        | j                   |         }|}: | j                  =|| j                  k  r.| j                  s"t        dt        | j                   |               | j                  || j                  k\  r| j                   |   }| xj                  |z  c_        | j                   |= |S )Nr   zFailed to group items)rd   keysr9   r_   r`   RuntimeErrorstrre   )r4   biggest_keybiggest_sizeresult_to_yieldfindkeys        r'   _remove_biggest_keyz'GrouperIterDataPipe._remove_biggest_key   s    ++002 	&G4''01L@"4#7#7#@A%	& &&2t999'''T-A-A+-N)O 
 &&.t999"22;?O-  -r)   c              #     K   | j                   D ]5  }| j                  |      }| j                  |   j                  |       | xj                  dz  c_        | j
                  | j
                  t        | j                  |         k(  rj| j                  | j                  |         }| j                  r||fn| | xj                  t        | j                  |         z  c_        | j                  |= | j                  | j                  k(  s| j                         }|| j                  |      }| j                  r||fn| 8 t        | j                  j                               D ]^  }| j                  | j                  j                  |            }| xj                  t        |      z  c_        | j                  r||fn| ` y w)Nr=   )r,   ra   rd   r8   re   r^   r9   r/   r\   rc   rn   tuplerg   pop)r4   r:   keyresultrl   s        r'   r;   zGrouperIterDataPipe.__iter__  s     	EA##A&C  %,,Q/!!Q&!*t#$$S)C 0 *.););D<P<PQT<U)V'+}}sFm&@%%T-A-A#-F)GG%((-$$(<(<<"&":":"<".!//@F+/==3-fD%	E( --2245 	=C''(<(<(@(@(EFF!!S[0!#'==3-f<	=s   D	GG B-Gc                 :    d| _         t        t              | _        y r7   )re   r   rV   rd   rA   s    r'   resetzGrouperIterDataPipe.reset  s     !*40r)   c           
      0   | j                   | j                  | j                  | j                  | j                  | j
                  | j                  | j                  | j                  | j                  f
}t        j                  t        j                  |      S |S rN   )r,   ra   r\   rc   r^   r_   r`   r/   _valid_iterator_id_number_of_samples_yieldedr   getstate_hookr4   states     r'   __getstate__z GrouperIterDataPipe.__getstate__   s    MMMM  OO&&##++
 %%1--e44r)   c                     |\
  | _         | _        | _        | _        | _        | _        | _        | _        | _        | _	        d| _
        t        t              | _        y r7   )r,   ra   r\   rc   r^   r_   r`   r/   rw   rx   re   r   rV   rd   rz   s     r'   __setstate__z GrouperIterDataPipe.__setstate__1  sZ     	
MM O&#+ !*40r)   c                 8    | j                   j                          y rN   )rd   clearrA   s    r'   __del__zGrouperIterDataPipe.__del__A  s    ""$r)   )r0   N)r&   rC   rD   rE   r   r   r   r   rH   rG   r   r3   rn   r;   ru   r|   r~   r   rY   r)   r'   r   r      s    *b  $(/3$'u%' w|,'
 ' ' SM'  (}' '<:=41"1 %r)   r   )r   collectionsr   collections.abcr   r   typingr   r   r   r	   (torch.utils.data.datapipes.iter.shardingr   %torch.utils.data.datapipes._decoratorr
   #torch.utils.data.datapipes.datapiper   r   'torch.utils.data.datapipes.utils.commonr   __all__r   ri   r(   r   r   r   rY   r)   r'   <module>r      s     # + 3 3 / E G H 	4(Gc G W;Y,y1 ;Y ;Y| Y1L 1  1h Yh%,y1 h%  h%r)   