
    Vh<?                        d dl Z d dlmZmZ d dlZd dlmZ d dlmZm	Z	 g dZ
 e j                  e      Z	 dZ G d d      Z G d	 d
e      Z e ej"                  d      d       Zd Z G d d      Z G d d      Zd Z	 	 ddeedf   deeeef      dedeeedf      deeeef      deee   ee   f   fdZdee   fdZy)    N)AnyOptionalmap_aggregate)tree_flattentree_unflatten)TensorChunkSpecsplit_args_kwargs_into_chunksmerge_chunksFc                       e Zd ZdZd Zy)_CustomReducera$  
    Custom reducer class that can be used to specify a custom operation that
    reduces losses of multiple microbatches into one value.

    Example:
    >>> # xdoctest: +SKIP
    >>> sum_reducer = _CustomReducer(
    >>>     torch.tensor(0.0),
    >>>     lambda a, b: a + b
    >>> )
    c                      || _         || _        y N)
init_value	reduce_fn)selfr   r   s      W/home/dcms/DCMS/lib/python3.12/site-packages/torch/distributed/pipelining/microbatch.py__init__z_CustomReducer.__init__(   s    $"    N)__name__
__module____qualname____doc__r    r   r   r   r      s    
#r   r   c                       e Zd Zy)_LossReducerNr   r   r   r   r   r   r   r   -       r   r   g        c                     | |z   S r   r   )abs     r   <lambda>r"   1   s
    1q5 r   c                   n    e Zd ZU dZd Zeed<   d Zd Ze	de
edf   fd       Ze	deeef   fd	       Zy
)r	   z2
    Class used to specify chunking of inputs
    c                     || _         y r   	split_dim)r   r&   s     r   r   zTensorChunkSpec.__init__=   s	    "r   r&   c                 |    | j                   j                   d| j                   j                   d| j                   dS )N.())	__class__r   r   r&   r   s    r   __repr__zTensorChunkSpec.__repr__B   s9    ~~(()4>>+B+B*C1T^^DTTUV	
r   c                 "    d| j                    dS )NzTensorChunkSpec(r*   r%   r,   s    r   __str__zTensorChunkSpec.__str__G   s    !$..!133r   
chunk_dims.c                      t        | d       }|S )a  
        A helper for creating a tuple of `TensorChunkSpec` from a tuple of chunk
        dimensions (int's).
        Example:
            >>> # xdoctest: +SKIP
            >>> # There are three positional arguments to the model, and
            >>> # we are chunking them along dimension 0, 0 and 1, respectively
            >>> args_chunk_spec = TensorChunkSpec.from_tuple((0, 0, 1))
        c                     t        |       S r   r	   dims    r   r"   z,TensorChunkSpec.from_tuple.<locals>.<lambda>Y       , r   r   )r0   args_chunk_specs     r   
from_tuplezTensorChunkSpec.from_tupleJ   s     (,
 r   c                      t        | d       }|S )a\  
        A helper for creating a dictionary of `TensorChunkSpec` from a
        dictionary of chunk dimensions (int's).
        Example:
            >>> # xdoctest: +SKIP
            >>> # Chunk dimension 0 for the "id" argument, 1 for the "mask" argument
            >>> kwargs_chunk_spec = TensorChunkSpec.from_dict({"id": 0, "mask": 1})
        c                     t        |       S r   r3   r4   s    r   r"   z+TensorChunkSpec.from_dict.<locals>.<lambda>k   r6   r   r   )r0   kwargs_chunk_specs     r   	from_dictzTensorChunkSpec.from_dict]   s     *,
 ! r   N)r   r   r   r   r   int__annotations__r-   r/   staticmethodtupler8   dictstrr<   r   r   r   r	   r	   8   se    # N

4 #s(O $ !cN! !r   r	   c                       e Zd Zy)
_ReplicateNr   r   r   r   rD   rD   q   r   r   rD   c                    i }g }|}d}t        |       t        |      k(  s;J dt        | j                                dt        |j                                       | j                         D ]G  \  }}t	        |      \  }	}
|j                  |
       ||   }|J t	        |      \  }}t        |	      t        |      k7  rt        d| d|       g }t        |	|      D ]  \  }}|t        u st        |t        j                        s|j                  |g|z         ?t        |t              rqt        |t        j                        s
J | d       |j                  |j                        }||k  r9|r"t        j!                  d| d	| d
| d       |}nt#        d| d| d| d      t        j$                  |||j                        }t&        rg }d}|D ]  }t        j(                  |      }||j                  |j                        z   }t+        ddd      g|j,                  z  }t+        ||      ||j                  <   |||<   |j                  |       ||j                  |j                        z  } |j                  |       n|j                  |       d}t/        d|        |||<   J g }t1        |      D ]D  }i }|j                         D ]  \  }}|D cg c]  }||   	 }}|||<    |j                  |       F g }|D ]b  } i }!t        |      t        |       k(  sJ t        | j                         |      D ]  \  \  }}}"t3        ||"      |!|<    |j                  |!       d |S c c}w )aW  
    Given a dictionary of args, and a dictionary of chunking specs, shard the
    args according to the chunking specs.

    Args:
        args_dict: Dictionary of args
        args_chunk_spec: Dictionary of chunking specs
        num_chunks: Number of chunks to shard the args into

    Returns:
        args_split: List of sharded args
    Tzargs_dict.keys() = z args_chunk_spec.keys() = NzArgument value z9 did not have the same number of values as as chunk spec z is not a tensorz%Tensor size on chunking dimension is z', downsizing the number of chunks from z to r(   zArg z% on chunking dimension has a size of z$, smaller than the number of chunks z. PiPPy cannot reduce the number of chunks because other arguments have bigger chunk-dimension sizes. Please adjust your num_chunks setting.r   FzUnrecognized chunk spec: )lenlistkeysitemsr   append
ValueErrorziprD   
isinstancetorchTensorr	   sizer&   loggerwarningRuntimeErrortensor_split_debug_mask_minibatches
zeros_likeslicendim	TypeErrorranger   )#	args_dictr7   
num_chunksargs_sharded_replicated	arg_specsreal_num_chunksfirst_tensorarg_keyargflatspec
chunk_specchunk_spec_flat_sharded_arg_flatvchunk_vv_split_dim_sizechunk_tensorsexpanded_chunkssplit_dim_idxchunk_tensornew_val	upper_idxslice_indiceschunks_flat	chunk_idx
chunk_argskeyv_flatarg_single_chunk
args_splitchunkper_chunk_argsarg_specs#                                      r   _shard_dict_of_argsr}   u   s   ( !I OLy>S11 
d9>>#3455OPTUdUiUiUkPlOmn1 ") I<!#&
d$W-
%%%)*5t9O,,!# '++5,8 
 dO4 8	GJAw*$Jq%,,,G ''o(=>G_5 "!U\\2Jqc9I4JJ2#$66'*;*;#< #o5# CDTCU VDDN<tTdSeefh +;*"7)+PQaPb cAAK MEE  !& 2 2(9(9! +&(O$%M(5 N"'"2"21"5$1L4E4EgFWFW4X$X	).tT4)@(AGLL(P;@)9<g&7&78 2>.'..w7%):):7;L;L)MMN %++O<$++M:$";G9 EFFq8	Gt ,<(SI<X K?+ '	
/557 	/HC@CDfy 1DD.JsO	/ 	:&' J *9~U+++$'y$A 	@ JS#"0h"?N3	@.)*   Es   "M5args.kwargschunksr7   r;   returnc                    |i }|t        t              ft        |       z  }|#t        j	                  |t        t                    }t        t        t        |             t        t        |            |      }t        |      }t        |||      }t        |      |k  r<t        |      }t        t        t        |             t        t        |            |      }t        |      t        |      k7  r#t        dt        |       dt        |             |D cg c](  t        fdt        t                    D              * }	}|	|fS c c}w )a  
    Given a sequence of args and kwargs, split them into a number of chunks
    according to  their respective chunking specs.

    Args:
        args: Tuple of args
        kwargs: Dict of kwargs
        chunks: Number of chunks to split the args and kwargs into
        args_chunk_spec: chunking specs for args, in same shape as args
        kwargs_chunk_spec: chunking specs for kwargs, in same shape as kwargs

    Returns:
        args_split: List of sharded args
        kwargs_split: List of sharded kwargs
    z;args and kwargs are split into different number of chunks: z, c              3   (   K   | ]	  }|     y wr   r   ).0iru   s     r   	<genexpr>z0split_args_kwargs_into_chunks.<locals>.<genexpr>V  s     <jm<s   )
r	   DEFAULT_CHUNK_DIMrF   rA   fromkeysr}   	enumeraterS   r@   rZ   )
r~   r   r   r7   r;   args_split_dictr_   kwargs_splitru   ry   s
           ` r   r
   r
      sW   p ~ *+<=?#d)K  MM&/BS2TU)Yt_Y'(O
 /*O&L <?* l+-4!?+,
 ?s<00I?#$Bs<'8&9;
 	
 * 	<U3z?%;<<J 
 |##s   -Ec                    |t        |      \  }}n-t        | d         \  }}t        t              gt        |      z  }g }| D ]I  }t        |      \  }}t        |      t        |      k7  rt	        d| d|       |j                  |       K g }	t        |      D ]  \  }
}t        |t              rft        t        |            D cg c]
  }||   |
    }}t        r|d   j                  }|dd D ]  }|j                  |k(  rJ  t        j                  t        j                  |ddit        |      |j                        }g }d}t        |      t        |      k(  sJ t        ||      D ]o  \  }}||j!                  |j                        z   }t#        ddd      g|j$                  z  }t#        ||      ||j                  <   ||   }|j                  |       |}q n|}|	j                  t        j&                  ||j                  	             ~t        |t(              rP|j*                  }t        t        |            D ]  }|j-                  |||   |
         } |	j                  |       |d   |
   }t        dt        |            D ]  }||   |
   |k(  rJ  |	j                  |         t/        |	|      S c c}w )
z
    Given a list of chunks, merge them into a single value according to
    the chunk spec.

    Args:
        chunks: list of chunks
        chunk_spec: Chunking spec for the chunks

    Returns:
        value: Merged value
    Nr   zChunk z did not match chunk spec    devicemeta)sectionsr5   r4   )r   r	   r   rF   rK   rJ   r   rM   rZ   rU   shaperN   rT   emptyr&   rL   rP   rW   rX   catr   r   r   r   )r   re   spec_flattenedflatten_specchunk0_flatchunks_flattenedrz   chunk_flattenedrg   args_flattenedarg_idxrb   rt   partial_valuesoverall_shapevalmeta_chunksvalues_to_catchunk_start_idxpartial_value
meta_chunkchunk_end_idxrr   slicedreduced_valvalues                             r   r   r   ]  s   Z '3J'?$ %1$;!\)*;<=K@PP  1)%03~#66veW,FzlSTT01 N!.1 0)c?+ "'s+;'<!= !+G4N 
 ' .q 1 7 7)!"- 6C995556#00KK>v> 0 !#"#>*c+.>>>>14^[1Q 4-M:$3joocmm6T$TM%*4t%<$=@R@R$RM38-3XM#--0*=9F!((0&3O4 !/!!%))Ms}}"MN^,..K"3'7#89 	!mm!1)!<W!E
 !!+.$Q'0E"1c*:&;< E	'	27;uDDDE!!%(a0)f .,77cs   
K	)NN)loggingtypingr   r   rN   torch.fx.noder   torch.utils._pytreer   r   __all__	getLoggerr   rQ   rU   r   r   tensorsum_reducerr   r	   rD   r}   r@   rA   rB   r=   rG   r
   r   r   r   r   <module>r      s8       ' < 
		8	$
   # #$	> 	 <5<<,.@A  5! 5!r	 	|F >B>Bf$
S/f$T#s(^$f$ f$ eOS$89:	f$
  S/%9 :;f$ 4;T
"#f$Rw8Iw8r   