
    Vh'              	       L   d dl Z d dlZd dlmZ d dlmZmZmZ d dlZd dl	m
Z d dlmZ d dlmZmZ d dlmZ d dlmZmZ d dlmZmZmZmZmZ d d	lmZmZmZ d d
l m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) g dZ* G d de'      Z+ G d de      Z,dee-ej\                  f   dee-ej\                  f   fdZ/dee-ej\                  f   dee-ej\                  f   fdZ0e1dk(  r G d de      Z2 e jf                         Z4e4jk                  de-de2D  cg c]  } | jl                   c} e2jn                         e4jk                  de-d        e4jk                  d!e-d"        e4jq                         Z9 e:d#e9jv                   d$e9jx                   d%e9jz                   d&       d'e9jv                   d(Z>e9jz                  e2jn                  jl                  k(  rLej~                  j                  e9jv                        r e0e9jv                  e9jx                         y e:e>       ye9jz                  e2j                  jl                  k(  rLej~                  j                  e9jv                        r e/e9jv                  e9jx                         y e:e>       y eCd)e9jz                         yc c} w )*    N)Enum)castOptionalUnion)narrow_tensor_by_index)FileSystemReaderFileSystemWriter)flatten_state_dict)_EmptyStateDictLoadPlannerDefaultLoadPlanner)MetadataSTATE_DICT_TYPESTORAGE_TYPESTensorPropertiesTensorStorageMetadata)LoadItemTypeLoadPlanLoadPlanner)_create_chunk_list)_load_state_dict)_save_state_dict)StorageReader)Future)dcp_to_torch_savetorch_save_to_dcpBroadcastingTorchSaveReaderDynamicMetaLoadPlannerc                      e Zd ZdZ	 	 ddeeeej                  f      de	ddfdZ
defdZded	eded   fd
ZdededdfdZdedefdZdee   dee   fdZddeeej                  df   ddfdZedeeej                  f   defd       Zy)r   aI  
    StorageReader for reading a Torch Save file. This reader will read the entire checkpoint
    on the coordinator rank, and then broadcast and shard each tensor to all ranks.

    . N.B. Intended to be used with DynamicMetaLoadPlanner

    .. warning::
        Current implementation only supports loading Tensors.

    >>> # xdoctest: +SKIP("undefined vars")
    >>> sd = {"mode": model}
    >>> dcp.load(
    >>>    sd,
    >>>    storage_reader=BroadcastingTorchSaveReader(),
    >>>    planner=DynamicMetaLoadPlanner(),
    >>>    checkpoint_id="path_to_model.pt"
    >>> )
    Ncheckpoint_idcoordinator_rankreturnc                      || _         || _        y N)r   r    )selfr   r    s      Y/home/dcms/DCMS/lib/python3.12/site-packages/torch/distributed/checkpoint/format_utils.py__init__z$BroadcastingTorchSaveReader.__init__;   s    
 + 0    c                     t        i       S )zGExtends the default StorageReader to support building the metadata filestate_dict_metadata)r   )r$   s    r%   read_metadataz)BroadcastingTorchSaveReader.read_metadataC   s     B//r'   planplannerc           	         t        t        |      }| j                  rK| j                  J t	        j
                  | j                  dd      }|j                  rt        |      \  }}nd}|j                  D ]  }|j                  t        j                  k(  r9t        d|j                  j                   dt        |       j                   d      | j                  rGt        j                   j#                         }||j                  j                     j%                  |      }n6t	        j&                  |j(                  |j                  j                           }t        j*                  || j,                  d       t/        ||j0                  |j2                        }|j5                  |      j7                         }|j9                         |j9                         k(  s6J d	|j                   d
|j9                          d|j9                                 |j;                  |       |j=                  ||        t?               }	|	jA                  d       |	S )z
        Reads torch save data on the coordinator rank, and broadcast afterwards
        this incurrs a communication cost, but avoids having to load
        the entire checkpoint on each rank, hopefully preventing OOM issues
        NcpuF)map_locationweights_onlyNon-tensor value identified at . At this time  only supports loading Tensors.)srcasync_opzreq z mismatch sizes, z vs )!r   r   is_coordinatorr   torchloadr
   itemstyper   BYTE_IORuntimeErrorstorage_indexfqn__name__distdistributed_c10d_get_pg_default_deviceto
empty_like
state_dict	broadcastr    r   storage_offsetslengthsresolve_tensordetachsizecopy_commit_tensorr   
set_result)
r$   r,   r-   torch_state_dict_req	pg_devicetensortarget_tensorfuts
             r%   	read_dataz%BroadcastingTorchSaveReader.read_dataI   s    )73 %%111$zz""U  ))&89I&J# !#:: 	6Cxx<///"5c6G6G6K6K5L M$$(J$7$7#88WY  "" 11HHJ	)#*;*;*?*?@CCIN))'*<*<S=N=N=R=R*STNN6t'<'<uM+FC4G4GUF#2237>>@M %%'6;;=8 s(()): %%'(V[[]O=8 '!!#}5/	62 ht
r'   metadatar7   c                     || _         | j                   r#t        j                         | j                  k(  sJ | j                  J y*Implementation of the StorageReader methodN)r7   rA   get_rankr    r   )r$   rX   r7   s      r%   set_up_storage_readerz1BroadcastingTorchSaveReader.set_up_storage_reader|   s?    ,==?d&;&;;;;!!---r'   c                     |S r[    )r$   r,   s     r%   prepare_local_planz.BroadcastingTorchSaveReader.prepare_local_plan   s    r'   global_planc                     |S r_   r`   )r$   rb   s     r%   prepare_global_planz/BroadcastingTorchSaveReader.prepare_global_plan   s    r'   c                     || _         yrZ   )r   )r$   r   s     r%   resetz!BroadcastingTorchSaveReader.reset   s
    *r'   c                 @    t         j                  j                  |      S r_   )ospathisfile)clsr   s     r%   validate_checkpoint_idz2BroadcastingTorchSaveReader.validate_checkpoint_id   s     ww~~m,,r'   )Nr   r#   )r@   
__module____qualname____doc__r   r   strrh   PathLikeintr&   r   r+   r   r   r   rW   boolr]   ra   listrd   rf   classmethodrl   r`   r'   r%   r   r   '   s   * <@ !1c2;;&6 781 1 
	10x 01h 1 1 1f.h . .QU .x H tH~ $x. +5bkk4)?#@ +D + -5bkk9I3J -t - -r'   r   c            	       @     e Zd ZdZ	 	 ddedee   deddf fdZ xZ	S )	r   a  
    Extension of DefaultLoadPlanner, which creates a new Metadata object based on the passed in state dict,
    avoiding the need to read metadata from disk. This is useful when reading formats which don't have a
    metadata file, like Torch Save files.

    . N.B. Intended to be used with BroadcastingTorchSaveReader

    .. warning::
        Current implementation only supports loading Tensors.

    >>> # xdoctest: +SKIP("undefined vars")
    >>> sd = {"mode": model}
    >>> dcp.load(
    >>>    sd,
    >>>    storage_reader=BroadcastingTorchSaveReader(),
    >>>    planner=DynamicMetaLoadPlanner(),
    >>>    checkpoint_id="path_to_model.pt"
    >>> )
    NrF   rX   r7   r!   c           	      |   t         |   |||       i }| j                  j                         D ]z  \  }}t	        j
                  |      s%t        d| dt        |       j                   d      t        t        |j                        |j                         t        |            ||<   | t        |      | _        y)zdSetups of the planner, extnding default behavior by creating the Metadata object from the state dictr2   r3   r4   )dtyper)   N)superset_up_plannerrF   r:   r8   	is_tensorr=   r;   r@   r   r   rx   rL   r   r   rX   )r$   rF   rX   r7   r*   keyrT   	__class__s          r%   rz   z%DynamicMetaLoadPlanner.set_up_planner   s     	z8^D8:??002 	KC??6*"5cU ;$$(J$7$7#88WY 
 (= v||4"6*($	 !5HIr'   )NF)
r@   rm   rn   ro   r   r   r   rs   rz   __classcell__)r}   s   @r%   r   r      sK    . (,$	J#J 8$J 	J
 
J Jr'   r   dcp_checkpoint_dirtorch_save_pathc                 t    i }t        |t        |       t               d       t        j                  ||       y)aq  
    Given a directory containing a DCP checkpoint, this function will convert it into a
    Torch save file.

    Args:
        dcp_checkpoint_dir: Directory containing the DCP checkpoint.
        torch_save_path: Filename to store the converted Torch save file.

    .. warning::
        To avoid OOM, it's recommended to only run this function on a single rank.
    T)storage_readerr-   no_distN)r   r   r   r8   save)r   r   sds      r%   r   r      s6     B
'(:;*,	 
JJr?#r'   c                 `    t        j                  | d      }t        |t        |      d       y)aB  
    Given the location of a torch save file, converts it into a DCP checkpoint.

    Args:
        torch_save_path: Filename of the Torch save file.
        dcp_checkpoint_dir: Directory to store the DCP checkpoint.

    .. warning::
        To avoid OOM, it's recommended to only run this function on a single rank.
    F)r1   T)storage_writerr   N)r8   r9   r   r	   )r   r   rF   s      r%   r   r      s-     O%@J #34F#GQUr'   __main__c                       e Zd ZdZdZy)
FormatModetorch_to_dcpdcp_to_torchN)r@   rm   rn   TORCH_TO_DCPDCP_TO_TORCHr`   r'   r%   r   r      s    %%r'   r   modezConversion mode)r;   helpchoicesdefaultr5   zPath to the source model)r;   r   dstzPath to the destination modelzConverting checkpoint from z to z using method: ''zNo checkpoint found at z. Skipping conversion.zUnknown conversion mode: )Dargparserh   enumr   typingr   r   r   r8   torch.distributeddistributedrA   torch.distributed._shard._utilsr   torch.distributed.checkpointr   r	   )torch.distributed.checkpoint._nested_dictr
   ,torch.distributed.checkpoint.default_plannerr   r   %torch.distributed.checkpoint.metadatar   r   r   r   r   $torch.distributed.checkpoint.plannerr   r   r   ,torch.distributed.checkpoint.planner_helpersr   .torch.distributed.checkpoint.state_dict_loaderr   -torch.distributed.checkpoint.state_dict_saverr   $torch.distributed.checkpoint.storager   torch.futuresr   __all__r   r   rp   rq   r   r   r@   r   ArgumentParserparseradd_argumentvaluer   
parse_argsargsprintr5   r   r   checkpoint_missing_warningri   rj   r   isdir
ValueError)ms   0r%   <module>r      se    	  ( (    B K H  U T K K J >  l-- l-^+J/ +J\$c2;;./$3+,$23+,c2;;./. z&T &
 %X$$&F
",-Q-''   C.HI
C.MND	
%dhhZtDHH:=Mdii[XYZ "$((+AB  yyJ++11177>>$((#dhh1,-	j--33	377=="dhh1,-4TYYK@AAI  .s   J!