
    1Vh(                     H    d Z ddlZddlZddlZ G d d      Z G d d      Zy)zEA key-value[] store that implements reservoir sampling on the values.    Nc                   8    e Zd ZdZd	dZd Zd Zd fdZd
dZy)	Reservoira  A map-to-arrays container, with deterministic Reservoir Sampling.

    Items are added with an associated key. Items may be retrieved by key, and
    a list of keys can also be retrieved. If size is not zero, then it dictates
    the maximum number of items that will be stored with each key. Once there are
    more items for a given key, they are replaced via reservoir sampling, such
    that each item has an equal probability of being included in the sample.

    Deterministic means that for any given seed and bucket size, the sequence of
    values that are kept for any given tag will always be the same, and that this
    is independent of any insertions on other tags. That is:

    >>> separate_reservoir = reservoir.Reservoir(10)
    >>> interleaved_reservoir = reservoir.Reservoir(10)
    >>> for i in range(100):
    >>>   separate_reservoir.AddItem('key1', i)
    >>> for i in range(100):
    >>>   separate_reservoir.AddItem('key2', i)
    >>> for i in range(100):
    >>>   interleaved_reservoir.AddItem('key1', i)
    >>>   interleaved_reservoir.AddItem('key2', i)

    separate_reservoir and interleaved_reservoir will be in identical states.

    See: https://en.wikipedia.org/wiki/Reservoir_sampling

    Adding items has amortized O(1) runtime.

    Fields:
      always_keep_last: Whether the latest seen sample is always at the
        end of the reservoir. Defaults to True.
      size: An integer of the maximum number of samples.
    c                     dk  st              k7  rt        dz        t        j                  fd      | _        t        j                         | _        | _        | _	        y)a4  Creates a new reservoir.

        Args:
          size: The number of values to keep in the reservoir for each tag. If 0,
            all values will be kept.
          seed: The seed of the random number generator to use when sampling.
            Different values for |seed| will produce different samples from the same
            input items.
          always_keep_last: Whether to always keep the latest seen item in the
            end of the reservoir. Defaults to True.

        Raises:
          ValueError: If size is negative or not an integer.
        r   z(size must be nonnegative integer, was %sc                  D    t        t        j                               S N)_ReservoirBucketrandomRandom)always_keep_lastseedsizes   ^/home/dcms/DCMS/lib/python3.12/site-packages/tensorboard/backend/event_processing/reservoir.py<lambda>z$Reservoir.__init__.<locals>.<lambda>M   s    $fmmD)+;     N)
round
ValueErrorcollectionsdefaultdict_buckets	threadingLock_mutexr   r   )selfr   r   r   s    ```r   __init__zReservoir.__init__;   s^     !8tuT{*G$NOO#//
  nn&	 0r   c                     | j                   5  t        | j                  j                               cddd       S # 1 sw Y   yxY w)zsReturn all the keys in the reservoir.

        Returns:
          ['list', 'of', 'keys'] in the Reservoir.
        N)r   listr   keysr   s    r   KeyszReservoir.KeysW   s5     [[ 	.**,-	. 	. 	.s	   #:Ac                     | j                   5  || j                  vrt        d|z        | j                  |   }ddd       |j                         S # 1 sw Y   j                         S xY w)a  Return items associated with given key.

        Args:
          key: The key for which we are finding associated items.

        Raises:
          KeyError: If the key is not found in the reservoir.

        Returns:
          [list, of, items] associated with that key.
        z!Key %s was not found in ReservoirN)r   r   KeyErrorItems)r   keybuckets      r   r"   zReservoir.Items`   sb     [[ 	($--'BSHII]]3'F	( ||~		( ||~s   ,AA)c                     | S r    xs    r   r   zReservoir.<lambda>r   s    Q r   c                     | j                   5  | j                  |   }ddd       j                  ||       y# 1 sw Y   xY w)a  Add a new item to the Reservoir with the given tag.

        If the reservoir has not yet reached full size, the new item is guaranteed
        to be added. If the reservoir is full, then behavior depends on the
        always_keep_last boolean.

        If always_keep_last was set to true, the new item is guaranteed to be added
        to the reservoir, and either the previous last item will be replaced, or
        (with low probability) an older item will be replaced.

        If always_keep_last was set to false, then the new item will replace an
        old item with low probability.

        If f is provided, it will be applied to transform item (lazily, iff item is
          going to be included in the reservoir).

        Args:
          key: The key to store the item under.
          item: The item to add to the reservoir.
          f: An optional function to transform the item prior to addition.
        N)r   r   AddItem)r   r#   itemfr$   s        r   r*   zReservoir.AddItemr   s<    , [[ 	(]]3'F	(tQ	( 	(s	   8ANc                 "   | j                   5  |r?|| j                  v r'| j                  |   j                        cddd       S 	 ddd       yt        fd| j                  j	                         D              cddd       S # 1 sw Y   yxY w)aD  Filter items within a Reservoir, using a filtering function.

        Args:
          filterFn: A function that returns True for the items to be kept.
          key: An optional bucket key to filter. If not specified, will filter all
            all buckets.

        Returns:
          The number of items removed.
        Nr   c              3   @   K   | ]  }|j                          y wr   )FilterItems).0r$   filterFns     r   	<genexpr>z(Reservoir.FilterItems.<locals>.<genexpr>   s$       &&x0s   )r   r   r/   sumvalues)r   r1   r#   s    ` r   r/   zReservoir.FilterItems   s     [[ 
	$--'==-99(C
	 
	
 
	 
	  "&--"6"6"8 
	 
	 
	s   .BB+BB)r   Tr   )	__name__
__module____qualname____doc__r   r   r"   r*   r/   r&   r   r   r   r      s&     D18.$ $/  4r   r   c                   0    e Zd ZdZddZd fdZd Zd Zy)	r   zA container for items from a stream, that implements reservoir sampling.

    It always stores the most recent item as its final item.
    Nc                    |dk  s|t        |      k7  rt        d|z        g | _        t        j                         | _        || _        d| _        ||| _        || _        yt        j                  d      | _        || _        y)a  Create the _ReservoirBucket.

        Args:
          _max_size: The maximum size the reservoir bucket may grow to. If size is
            zero, the bucket has unbounded size.
          _random: The random number generator to use. If not specified, defaults to
            random.Random(0).
          always_keep_last: Whether the latest seen item should always be included
            in the end of the bucket.

        Raises:
          ValueError: if the size is not a nonnegative integer.
        r   z)_max_size must be nonnegative int, was %sN)r   r   itemsr   r   r   	_max_size_num_items_seen_randomr	   r
   r   )r   r<   r>   r   s       r   r   z_ReservoirBucket.__init__   s     q=Iy)99;iG  
  nn&" "DL !1 "==+DL 0r   c                     | S r   r&   r'   s    r   r   z_ReservoirBucket.<lambda>   s     r   c                 <   | j                   5  t        | j                        | j                  k  s| j                  dk(  r"| j                  j	                   ||             n| j
                  j                  d| j                        }|| j                  k  r=| j                  j                  |       | j                  j	                   ||             n!| j                  r ||      | j                  d<   | xj                  dz  c_        ddd       y# 1 sw Y   yxY w)a;  Add an item to the ReservoirBucket, replacing an old item if
        necessary.

        The new item is guaranteed to be added to the bucket, and to be the last
        element in the bucket. If the bucket has reached capacity, then an old item
        will be replaced. With probability (_max_size/_num_items_seen) a random item
        in the bucket will be popped out and the new item will be appended
        to the end. With probability (1 - _max_size/_num_items_seen)
        the last item in the bucket will be replaced.

        Since the O(n) replacements occur with O(1/_num_items_seen) likelihood,
        the amortized runtime is O(1).

        Args:
          item: The item to add to the bucket.
          f: A function to transform item before addition, if it will be kept in
            the reservoir.
        r      N)
r   lenr;   r<   appendr>   randintr=   popr   )r   r+   r,   rs       r   r*   z_ReservoirBucket.AddItem   s    & [[ 
	&4::/4>>Q3F

!!!D'*LL((D,@,@At~~%JJNN1%JJ%%ag.**%&tWDJJrN  A% 
	& 
	& 
	&s   C<DDc                    | j                   5  t        | j                        }t        t	        || j                              | _        |t        | j                        z
  }|dkD  r!t        | j                        t        |      z  nd}t        t        | j                  |z              | _        |cddd       S # 1 sw Y   yxY w)a  Filter items in a ReservoirBucket, using a filtering function.

        Filtering items from the reservoir bucket must update the
        internal state variable self._num_items_seen, which is used for determining
        the rate of replacement in reservoir sampling. Ideally, self._num_items_seen
        would contain the exact number of items that have ever seen by the
        ReservoirBucket and satisfy filterFn. However, the ReservoirBucket does not
        have access to all items seen -- it only has access to the subset of items
        that have survived sampling (self.items). Therefore, we estimate
        self._num_items_seen by scaling it by the same ratio as the ratio of items
        not removed from self.items.

        Args:
          filterFn: A function that returns True for items to be kept.

        Returns:
          The number of items removed from the bucket.
        r   N)	r   rC   r;   r   filterfloatintr   r=   )r   r1   size_before	size_diffprop_remainings        r   r/   z_ReservoirBucket.FilterItems   s    & [[ 	djj/KfXtzz:;DJ#c$**o5I 9DaDJJ%"44Q  $'d**^;<$D  	 	 	s   B!B88Cc                 p    | j                   5  t        | j                        cddd       S # 1 sw Y   yxY w)z Get all the items in the bucket.N)r   r   r;   r   s    r   r"   z_ReservoirBucket.Items  s*    [[ 	$

#	$ 	$ 	$s   ,5)NT)r5   r6   r7   r8   r   r*   r/   r"   r&   r   r   r   r      s!    
1< * &>B$r   r   )r8   r   r	   r   r   r   r&   r   r   <module>rP      s/     L   I IXg$ g$r   