
    Vhx                       d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlZd dlm	Z	 d dl
mZ d dlmZ ddlmZmZ ddlmZmZmZmZmZmZ dd	lmZ d
dlmZ d
dlmZ ej>                  j@                  Z ej>                  jB                  Z!ej>                  jD                  Z"ej>                  jF                  Z#e"jH                  jJ                  e"jH                  jL                  gZ'e jP                  jR                  e jT                  jJ                  e jV                  jJ                  gZ,	 defdZ- e       dd
fdZ. e       dfdZ/d Z0d Z1djdZ2 ee"jf                  jJ                   ed       ed       ed       ed       ed       ed       ed            Z4 e.e4 ed            Z5 ee jl                  jJ                  e4 ed            Z7 ee jl                  jJ                  e5 ed            Z8dkd Z9dkd!Z:dkd"Z;dkd#Z< ee"jz                  jJ                   ed$       ed%       ed&       e        e        ed'            Z>	 	 dld(Z?d) Z@djd*ZAd+ ZBd, ZCdjd-ZDd. ZEd/ ZFd0 ZGdjd1ZHd2 ZId3 ZJd4 ZKd5 ZLd6 ZM	 dmd7ZNd8 ZOd9 ZPd: ZQd; ZRd< ZSd= ZTd> ZUd? ZV ee"jz                  jJ                   e        e        e        e        e        e             ZWd@ ZXdA ZYdB ZZdC Z[dD Z\dE Z]dF Z^dG Z_dH Z`dI ZadJ Zbej                  fdKZdej                  fdLZedM Zfej                  fdNZgej                  fdOZhej                  fdPZidQ ZjdR ZkdS Zlej                  ddfdTZmej                  ddfdUZnej                  ddfdVZoej                  ddddfdWZp	 	 	 dndXZqdY ZrdZ Zsd[ Zt	 	 	 dnd\Zud] Zvd^ Zw G d_ d`      Zxda Zydb Zzdc Z{dd Z|de Z}df Z~ ej                  d      dg        Zdhej                  j                  fdiZy)o    N)Any)counters)has_free_symbols)map_arg   )	loweringsrequire_channels_last)ArgCallFunctionfilter_nodes
KeywordArgListOfMatch)pad_listlike   )register_freezing_graph_pattern)register_lowering_patternmatchc                 R   | j                         }t        |      dk(  sJ |d   }t        |t        j                  j
                        sJ |j                  d   j                  }|t        j                  t        j                  t        j                  t        j                  fv sJ |S )zp
    Get the pattern's output dtype from node's meta
    Assume only 1 output node in this matched pattern.
    r   r   val)output_nodeslen
isinstancetorchfxNodemetadtypeint8uint8float32bfloat16)r   pattern_output_nodesoutput_nodeoutput_dtypes       V/home/dcms/DCMS/lib/python3.12/site-packages/torch/_inductor/fx_passes/quantization.py_get_pattern_output_dtyper'   8   s    
 !--/#$)))&q)Kk588==111##E*00LEJJU]]ENNSSSS    Tc                 X    |r't        t        j                  j                  | ||      S | S )N_users)r   primsconvert_element_typedefault)patternr   with_dtype_convertuserss       r&   (_may_generate_pattern_with_dtype_convertr2   F   s3     &&..	
 	
 r(   c                 |    |r9t        t        j                  j                  j                  j
                  | |      S | S N)r   r   opsatenreshaper.   )r/   reshape_sizewith_reshapes      r&   "_may_generate_pattern_with_reshaper:   T   s6    IINN""**
 	
 r(   c           	          |t         j                  t         j                  fv sJ t        t        j
                  j                  t        | t        d      |t         j                  k(        t        d            }|S )Nautocast_wgt_dtypepermute_axes)	r   r!   r"   r   r6   permuter.   r2   r   )_dequant_per_channel_patternr   	t_patterns      r&   _generate_linear_t_patternrA   _   sh     U]]ENN33330(+,U^^#	

 	>"I r(   c                 B    t        |t        d      ||      } | |      S )Nto_float)r   r0   r1   )r2   r   )unary_fusioncall_fnr1   is_bf16computation_calls        r&   _unary_fusion_patternrH   p   s*    ?z*-'QV ())r(   Fc                     t        | rt        j                  j                  nt        j                  j                  t        d      t        d      t        d      t        d      t        d      t        d            }|S )Nxx_scalex_zpx_quant_minx_quant_max
x_dq_dtype)r   quantized_decomposeddequantize_per_tensortensorr.   r   )is_tensor_overload(dequantize_per_tensor_activation_patterns     r&   ,get_dequantize_per_tensor_activation_patternrU   x   sg    /; 	2299!77??396=!=!< 
0, 43r(   q_weightw_scalew_zpw_axisw_quant_minw_quant_maxw_dtyper<   memory_format)r]   c                    t        t        j                  j                  j                  j
                  t        d      t        d      t        d      t        d      t        d      t        d      t        d      t        d      t        d	      t        d
      t        d      t        d      t        d      t        d      t        d      t        d      t        d      |       S )NrJ   rK   rL   packed_weightrW   rX   bstridepaddingdilationgroupsoutput_scaleoutput_zero_pointr%   postop_namepostop_argspostop_algorithmr*   )r   r   r5   onednnqconv2d_pointwiser.   r   r1   s    r&   get_qconv2d_pt2e_patternrm      s    		**22396?#96389:8>"&'>"=!=!%&' r(   c                 ,   t        t        j                  j                  j                  j
                  t        d      t        d      t        d      t        d      t        d      t        d      t        d      t        d      t        d	      t        d
      t        d      t        d      t        d      t        d      t        d      t        d      t        d      t        d      t        d      t        d      t        d      t        d      |       S )NrJ   rK   rL   r_   rW   rX   accumr`   ra   rb   rc   rd   re   rf   r%   accum_scaleaccum_zero_pointbinary_op_namealphaunary_op_nameunary_op_argsunary_op_algorithmr*   )r   r   r5   rj   rk   binaryr   rl   s    r&   get_qconv2d_binary_pt2e_patternrx      s    		**11396?#967389:8>"&'>"=!%&#$7?#?#'(1 r(   c                    | r.t         j                  j                  j                  j                  n-t         j                  j                  j                  j
                  }t        |t        d      t        d      t        d      t        d      t        d      t        d      t        d      t        d      t        d	      t        d
      t        d      t        d      t        d      |      S )NrJ   rK   rL   r_   rW   rX   r`   re   rf   r%   rg   rh   ri   r*   )r   r5   rj   qlinear_pointwiserR   r.   r   r   x_scale_zp_are_tensorsr1   
qlinear_ops      r&   get_qlinear_pt2e_patternr~      s     " 			**11YY//77 
 396?#963>"&'>"=!=!%& r(   c                 @   | r.t         j                  j                  j                  j                  n-t         j                  j                  j                  j
                  }t        |t        d      t        d      t        d      t        d      t        d      t        d      t        d      t        d      t        d	      t        d
      t        d      t        d      t        d      t        d      t        d      t        d      t        d      t        d      |      S )NrJ   rK   rL   r_   rW   rX   x_2r`   re   rf   r%   x2_scalex2_zprr   rs   rt   ru   rv   r*   )r   r5   rj   rz   binary_tensorrw   r   r   r{   s      r&   get_qlinear_binary_pt2e_patternr      s     " 			**88YY//66 
 396?#9653>"&'>":7#$7?#?#'() r(   ro   rp   accum_zpaccum_dq_dtypec                 f    |rt        | ||      nt        | ||      }t        |t        d      |      S )Nconvert_dtype_after_inplace_add)r   r2   r   )binary_post_oprG   extra_input_patterndtype_convertswap_inputsbinary_patterns         r&   generate_pattern_with_binaryr     sP      		
 
  445 r(   c                 "    |t        ||       S | S r4   )r   )rG   unary_post_ops     r&   generate_pattern_with_unaryr   8  s#     
 	
 r(   c                     t        t        j                  j                  t	        | t               |      t        d      t        d      t        d      t        d      t        d            }|S )No_inv_scaleo_zpo_qmino_qmaxo_dtype)r   rP   quantize_per_tensorr.   r2   r
   r   )rG   r0    quantized_op_output_pattern_pt2es      r&   "generate_pattern_with_output_quantr   A  sa    '300880E	

 	=!6889($ ,+r(   c                     || j                   v r| j                   |   }||k(  S t        | j                        |dz   k\  sJ | j                  |   }||k(  S )Nr   )kwargsr   args)
check_node
kwarg_name
args_indexexpected_valueactual_values        r&   _check_node_kwarg_arg_valuer   R  s_    Z&&&!((4~--:??#
Q777!z2~--r(   c                      d } | S )Nc                     t        |       }|t        j                  t        j                  fv rIt	        | j
                  t        j                  j                  j                        d   }t        |dd|      S y)Nr   r%      T)
r'   r   r!   r"   r   nodesr5   rj   rk   r   )r   r%   qconv_node_after_weight_prepacks      r&   fnz;_is_valid_quantized_conv2d_optimization_pattern.<locals>.fn]  sj    07EMM5>>::.:UYY--??//+ //\  r(    r   s    r&   /_is_valid_quantized_conv2d_optimization_patternr   \      
 Ir(   c                 .    | r
t               S t               S r4   )+_is_valid_qconv_binary_optimization_patternr   has_binary_post_ops    r&   &_is_valid_qconv_post_op_fusion_patternr   l  s!      	45 =>r(   c                      d } | S )Nc                    t        | j                        dk7  ry| j                  d   j                  t        j                  j
                  j                  j                  t        j                  j
                  j                  j                  t        j                  j
                  j                  j                  t        j                  j
                  j                  j                  fv S Nr   Fr   )r   r   targetr   r5   rj   rk   r.   rR   rw   r   r   s    r&   r   z,_is_valid_qconv_lowering_pattern.<locals>.fnu      u{{q {{1~$$II..66II..55II..55II..<<	)
 
 	
r(   r   r   s    r&    _is_valid_qconv_lowering_patternr   t      
 Ir(   c                 R    t        | t               |      dt        ffd       }|S )Nextra_checkpass_numberr   c                     |d   |d   |d   }}}|d   |d   |d   }}}|d   |d   |d	   |d
   |d   f\  }	}
}}}t        |       }|t        j                  t        j                  t        j                  t        j
                  fv sJ |d   }|d   }|d   }|d   }|d   }|d   }|||||||	|
|||||||||f}t        d   dxx   dz  cc<   t        d   dxx   t        | j                        z  cc<   t           | S )NrJ   rK   rL   r_   rW   rX   r`   ra   rb   rc   rd   re   rf   r%   rg   rh   ri   inductorqconv2d_unary_lower_countr   qconv2d_unary_lower_nodes)
r'   r   r   r    r!   r"   r   r   r   L)r   r   r   rJ   rK   rL   r_   rW   rX   r`   ra   rb   rc   rd   r%   r   o_zero_pointrg   rh   ri   computation_argscomputation_ops                        r&   qconvz0_register_quantized_conv_lowering.<locals>.qconv  sx    3K96N 7 ?#96N !%w 3K89:80
,67Hf 17

EKKWWWW^,12n-]+]+!"45 #
& 	89Q>989S=MM9 "233r(   r   r   r   )r/   r   r   r   s     ` r&   !_register_quantized_conv_loweringr     s7    
 46
54U 54
54n Lr(   c                      d } | S )Nc                     t        |       }|t        j                  t        j                  fv rIt	        | j
                  t        j                  j                  j                        d   }t        |dd|      S y)Nr   r%   	   T)
r'   r   r!   r"   r   r   r5   rj   rz   r   )r   r%   !qlinear_node_after_weight_prepacks      r&   r   z;_is_valid_quantized_linear_optimization_pattern.<locals>.fn  sj    07EMM5>>::0<UYY--??11- /1>1l  r(   r   r   s    r&   /_is_valid_quantized_linear_optimization_patternr     r   r(   c                 .    | r
t               S t               S r4   )-_is_valid_qlinear_binary_optimization_patternr   r   s    r&   (_is_valid_qlinear_post_op_fusion_patternr     s!      	67 =>r(   c                      d } | S )Nc                    t        | j                        dk7  ry| j                  d   j                  t        j                  j
                  j                  j                  t        j                  j
                  j                  j                  t        j                  j
                  j                  j                  t        j                  j
                  j                  j                  fv S r   )r   r   r   r   r5   rj   rz   r.   rR   rw   r   r   s    r&   r   z._is_valid_qlinear_lowering_pattern.<locals>.fn  r   r(   r   r   s    r&   "_is_valid_qlinear_lowering_patternr     r   r(   c                 R    t        | t               |      dt        ffd       }|S )Nr   r   c                 H   t        |       }|d   |d   |d   }}}|d   |d   |d   }	}}d|v r|d   nd }
|d   }|d	   }|d
   }|d   }|d   }||||||	|
||||||f}t        d   dxx   dz  cc<   t        d   dxx   t        | j                        z  cc<   t	           | S )NrJ   rK   rL   r_   rW   rX   r`   re   rf   rg   rh   ri   r   qlinear_unary_lower_countr   qlinear_unary_lower_nodes)r'   r   r   r   r   )r   r   r   r%   rJ   rK   rL   r_   rW   rX   r`   r   r   rg   rh   ri   r   r   s                    r&   qlinearz:_register_quantized_linear_unary_lowering.<locals>.qlinear  s    17 3K96N 7 ?#96N !%w &=F3Kd ^,12 ]+]+!"45 
 	89Q>989S=MM9 "233r(   r   r   r   )r/   r   r   r   s     ` r&   )_register_quantized_linear_unary_loweringr     s7    
 68
,4u ,4
,4\ Nr(   c                 R    t        | t               |      dt        ffd       }|S )Nr   r   c                    t        |       }|J |d   |d   |d   }}}|d   }|d   }|d   }	|d   |d   |d	   }}}
d
|v r|d
   nd }|d   }|d   }|j                          ddlm} |d   }|d   }|d   }|d   }|d   }|dk(  r
 ||      sd}||||
|||||||||	|||||f}t        d   dxx   dz  cc<   t        d   dxx   t        | j                        z  cc<   t           | S )NrJ   rK   rL   r   r   r   r_   rW   rX   r`   re   rf   r   _can_be_inplacerr   rs   rt   ru   rv   sumaddr   qlinear_binary_lower_countqlinear_binary_lower_nodesr'   realizemkldnn_fusionr   r   r   r   r   )r   r   r   r%   rJ   rK   rL   x2r   r   r_   rW   rX   r`   r   r   r   rr   rs   rt   ru   rv   r   r   s                          r&   qlinear_binaryzB_register_quantized_linear_binary_lowering.<locals>.qlinear_binary,  s    17''' 3K96N 7
 E]*%w ?#96N !%w &=F3Kd^,12


2 01w//#$89U"?2+> #N %
( 	9:a?:9:c%++>NN: "233r(   r   )r/   r   r   r   s     ` r&   *_register_quantized_linear_binary_loweringr   '  s;    
 68
A4e A4
A4F r(   c                  \    t        t        j                  j                  j                        S r4   )2_is_valid_quantized_op_binary_optimization_patternr   r5   rj   rk   r   r(   r&   r   r   w  s"    =		** r(   c                  `    t        t        j                  j                  j                  d      S )NF)extra_input_from_dequant)r   r   r5   rj   rz   r   r(   r&   r   r   }  s%    =		**!& r(   c                       fd}|S )Nc                    t        |       }t        | j                  	      d   }t        |j                        dk7  ryt        t        |j                              j                  }t        |      dk(  sJ d       |t        j                  t        j                  fv rdd }|D ]  }||k7  s	|} n |J rLt        |t        j                  j                        r'|j                  t        j                   j"                  k7  ryt%        |d   d      r}t        |d   j&                  j)                  dd       t        j*                        rFt%        |d   d      r7t        |d   j&                  j)                  dd       t        j*                        sy|d   j&                  d   j-                         |d   j&                  d   j-                         k7  ryddlm} d	| j2                  v r| j2                  d	   nA|t        j4                  t        j6                  fv ss| j2                  d
   n| j2                  d   }t         |||            dkD  s||j                  d   k(  ryy)Nr   r   Fr   z!Expects binary node with 2 inputsr   r   )_get_remaining_usersotherro   accum_after_dequantT)r'   r   r   r   r1   nextiterr   r   r!   r"   r   r   r   r   rP   rQ   r.   hasattrr   getTensorsizer   r   r   r    r   )
r   r%   compute_nodebinary_node_inputsextra_input_of_binary_nodeargr   extra_input_of_patternr   qops
           r&   r   z>_is_valid_quantized_op_binary_optimization_pattern.<locals>.fn  s5   07#EKK5a8|!!"a'!$|'9'9":;@@%&!+P-PP+EMM5>>::)-&) ,&14. .999' :EHHMMJ.55+AAIIJ  &q)62-a05599%FU&q)62-a05599%FU q!&&u-224!!$))%05578  	8 %,,& LL! !U[[%**$==0 W% \\"78 	 $%;\JKaO%):):1)==r(   r   )r   r   r   s   `` r&   r   r     s    >@ Ir(   c                 R    t        | t               |      dt        ffd       }|S )Nr   r   c                     t        |       }|J |d   |d   |d   }}}|d   }|d   }|d   }	|d   |d   |d	   }}}
|d
   |d   |d   |d   |d   f\  }}}}}|d   }|d   }|d   }|d   }|d   }|d   }|d   }|j                          ddlm}  ||      sJ d       ||||
|||||||||||||	|||||f}t        d   dxx   dz  cc<   t        d   dxx   t        | j                        z  cc<   t           | S )NrJ   rK   rL   ro   rp   rq   r_   rW   rX   r`   ra   rb   rc   rd   re   rf   rr   rs   rt   ru   rv   r   r   zGQConv Binary Inplace Fusion requires accum is not an alias or mutation.r   qconv2d_binary_lower_countqconv2d_binary_lower_nodesr   )r   r   r   r%   rJ   rK   rL   ro   rp   r   r_   rW   rX   r`   ra   rb   rc   rd   re   rf   rr   rs   rt   ru   rv   r   r   r   s                              r&   qconv_binaryz>_register_quantized_conv_binary_lowering.<locals>.qconv_binary  s    17'''!#;y(96&>D7w]+,-?#96N !%w 3K89:80
,67Hf n-"#67   01w//#$892u% 	
U	
%
 -
0 	9:a?:9:c%++>NN: "233r(   r   )r/   r   r   r  s     ` r&   (_register_quantized_conv_binary_loweringr    s8    
 46
?4E ?4
?4B r(   c                     dD ]F  } t        |       }t        |dt        j                  j                  j
                  j                         H dD ]x  }t        |      }|r.t        j                  j                  j                  j                  n-t        j                  j                  j                  j                  }t        |d|       z y N)r   r   r   FT)rm   r   r   r5   rj   rk   r.   r~   rz   rR   r   r1   qconv_patternr|   qlinear_patternr   s        r&   %_register_quantization_unary_loweringr  !  s     
07)II..66	

 #0 
23IJ & II..55!!33;; 	
 	2	

r(   c                     dD ]F  } t        |       }t        |dt        j                  j                  j
                  j                         H dD ]x  }t        |      }|r.t        j                  j                  j                  j                  n-t        j                  j                  j                  j                  }t        |d|       z y r  )rx   r  r   r5   rj   rk   rw   r   rz   r   r   r  s        r&   &_register_quantization_binary_loweringr  :  s     
7>0II..55	

 #0 
9:PQ & II..<<!!33:: 	
 	3	

r(   c                      d } | S )Nc                 t    t        | j                  t        j                        d   }|j                  d   dk(  S Nr   r   )r   r   operatorgetitemr   )r   get_item_nodes     r&   r   z>_is_valid_quantized_maxpool2d_optimization_pattern.<locals>.fnT  s5     %U[[(2B2BCAF!!!$))r(   r   r   s    r&   2_is_valid_quantized_maxpool2d_optimization_patternr  S  s    * Ir(   c                 P    t        | t                     dt        ffd       }|S )Nr   r   c                 6   |d   }|d   }d|v r|d   nd }d|v r|d   nd}d|v r|d   nd}d|v r|d   nd	}|dk(  rddg}|dk(  rddg}|s|}t        |d
      }t        |d
      }t        |d
      }t        |d
      }t        |      d
k(  sJ t        |      d
k(  sJ t        |      d
k(  sJ t        |      d
k(  sJ ||||||f}	t        g|	 \  }	}
t        d   dxx   dz  cc<   t        d   dxx   t        | j                        z  cc<   t           |	 S )NrJ   kernel_sizera   rb   r   rc   r   	ceil_modeFr   r   qmaxpool2d_matcher_countqmaxpool2d_matcher_nodes)r   r   r	   r   r   r   )r   r   r   rJ   r  ra   rb   rc   r  r   _r   s              r&   
qmaxpool2dz:_register_quantized_maxpool2d_lowering.<locals>.qmaxpool2da  s   
 3K]+&.&&8!t(1V(;&#!*4*>6*%Q,76,AF;'	a<!fGq=1vH F";2fa(w*!,;1$$$6{a7|q   8}!!! 
 4NVEUV!78A=878C<LL8 "233r(   )r   r  r   )r/   r   r  s    ` r&   &_register_quantized_maxpool2d_loweringr  ]  s5     FH#4% #4	#4J r(   c            	         t        d      gt        d      t        d      gt        d      t        d      t        d      gt        d      t        d      t        d      t        d      gg} | D ]  }t        t        j                  j                  t               t        d      g| }t        t        j                  j                  t               t        d      g|t        d       }t        t        j                  |t                     }t        t        j                  |t                     }t        t        |      t        j                  j                         t        t        |      t        j                  j                          y )Nra   rb   rc   r  r  offset_dtype)r   r   r6   max_pool2d_with_indicesr.   rU   r,   #_low_memory_max_pool2d_with_offsetsr  r  r
   r  r   	quantized
max_pool2d)max_pool2d_args_listmax_pool2d_argsdequantize_maxpool2d_pattern#dequantize_lowmem_maxpool2d_pattern%dequantize_maxpool2d_get_item_pattern,dequantize_lowmem_maxpool2d_get_item_patterns         r&    _register_quantization_maxpool2dr+    s    x 	
 x y!	

 x y!z"	
 x y!z"{#		
( 0 !
'3((008:}%(
 	(
$ /;55==8:}%/
 	/

 ~&/
+ 1=(E1
-
 8D/E8
4
 	/./TU  ((	
 	/.<   ((		
9!
r(   c                     d }|S )Nc                 J   t        | j                  t        j                  j                        }|D cg c]  }|j
                  d    c}t        | j                  t        j                  j                        }t        |      dk(  sJ d       j                  |d   j
                  d          t        fdD              sy|D cg c]  }|j
                  d    c}j                  |d   j
                  d          t        fdD              syyc c}w c c}w )	Nr   r   z.expect only 1 add node at output quant patternr   c              3   .   K   | ]  }|d    k(    yw)r   Nr   ).0
zero_pointzero_pointss     r&   	<genexpr>z=_is_input_output_same_scale_zp.<locals>.fn.<locals>.<genexpr>  s     NJ:Q/Ns   Fc              3   T   K   | ]  }t        j                  |d    d       ! yw)r   gh㈵>)rel_tolN)mathisclose)r/  scalescaless     r&   r2  z=_is_input_output_same_scale_zp.<locals>.fn.<locals>.<genexpr>  s%     TE4<<vay$??Ts   %(T)
r   r   rP   rQ   r.   r   r   r   appendall)r   dequant_nodesnodequant_nodesr8  r1  s       @@r&   r   z*_is_input_output_same_scale_zp.<locals>.fn  s     %KK-CCKK
 1>>tyy|>"KK-AAII
 ;1$V&VV$;q>..q12N+NN ,994$))A,9k!n))!,-TVTT! ? :s   DD r   )r   r   s     r&   _is_input_output_same_scale_zpr>    s    2 Ir(   c                     t        | t        t        j                  j                              dt
        ffd       }|S )Nr  r   c                     |D cg c]  }|d   	 }}t         d   dxx   dz  cc<   t         d   dxx   t        | j                        z  cc<   t           ||      S c c}w )Nr   r   qcat_matcher_countr   qcat_matcher_nodesr   r   r   r   )r   inputsdimr   inputuint8_inputsr   s         r&   qcatz._register_quantized_cat_lowering.<locals>.qcat  sh     /55Ua5512a7212c%++6FF2 s33 6s   A)r   r>  r6   catr.   r   )r/   r   rH  s    ` r&    _register_quantized_cat_loweringrJ    s?     24883C3CD4E 4	4 Kr(   c                      t        t        j                  j                  t	        t
              t        d            } t        t        |       t        j                         y )NrE  )	r   r6   rI  r.   r   -_raw_dequantize_per_tensor_activation_patternr   rJ  r   )dequantize_cat_patterns    r&   _register_quantization_catrN    sE    )<=5
 %*+ABr(   c                     t        | t        t        j                  j                              dt
        ffd       }|S )Nr  r   c                     |d   }|d   }t         d   dxx   dz  cc<   t         d   dxx   t        | j                        z  cc<   t           ||      S )NrJ   shaper   qreshape_matcher_countr   qreshape_matcher_nodesrC  )r   r   r   qxrQ  r   s        r&   qreshapez6_register_quantized_reshape_lowering.<locals>.qreshape  s^    
 C[w56!;656#ekk:JJ6 U++r(   )r   r>  r6   r7   r.   r   )r/   r   rU  s    ` r&   $_register_quantized_reshape_loweringrV    s?     24<<3G3GH, ,	, Or(   c                      t        t        j                  j                  j                  j
                  t               t        d            } t        t        |       t        j                         y )NrQ  )
r   r   r5   r6   r7   r.   rU   r   rV  r   )dequantize_reshape_patterns    r&   _register_quantization_reshaperY  #  sL    !-		&&467"
 )*+EFr(   c                      d } | S )Nc                 Z    t         fddD              sJ t         fddD              sy j                  d   j                  d   } j                  d   j                  d   } j                  d   j                  d   }|j                  t        j
                  k(  xr |j                  t        j                  k(  xrn |j                  t        j
                  k(  xrO |j                  j                  d	k(  xr4 |j                  |j                  k(  xr |j                  |j                  k(  S )
Nc              3   :   K   | ]  }|j                   v   y wr4   )r   )r/  kr   s     r&   r2  zA_is_valid_woq_optimization_pattern.<locals>.fn.<locals>.<genexpr>1  s     H1$Hs   )rJ   weightr8  c              3   P   K   | ]  }t        j                  |   d         yw)r   N)r   r   )r/  keyr   s     r&   r2  zA_is_valid_woq_optimization_pattern.<locals>.fn.<locals>.<genexpr>2  s&      
36GELL%v.
s   #&FrJ   r   r^  r8  cpu)	r:  r   r   r   r   r"   r   devicetype)r   rJ   r^  r8  s   `   r&   r   z._is_valid_woq_optimization_pattern.<locals>.fn0  s   H.GHHHH 
:S
 
 LL""5)h',,U3h',,U3 GGu~~% *

**.*
 &* FMM)* FMM)	
r(   r   r   s    r&   "_is_valid_woq_optimization_patternrd  /  s    
, Ir(   c                 T    t        | t                     dt        ffd       }|S )Nr  r   c                 Z   |d   }|d   }|d   }t         d   dxx   dz  cc<   t         d   dxx   t        | j                        z  cc<   |j                         d   }|j                         }d	|d	   g}|d d	 |gz   }	t	           ||      }
t	           |
||      }t	           ||	      S )
NrJ   r^  r8  r   woq_matcher_countr   woq_matcher_nodesr   )r   r   r   get_sizer   )r   r   r   rJ   r^  r8  out_featuresorigin_x_sizex_shape	out_shapefunc1func2computation_reshapecomputation_woqs               r&   woqz#_register_woq_lowering.<locals>.woqJ  s    
 3K!!01Q6101S5EE1(+

}R()!#2&*
 
	 %&q'2/"5&&9$%eY77r(   )r   rd  r   )r/   rr  rq  rs  s    `` r&   _register_woq_loweringrt  I  s1    6885 8	8  Jr(   c                     t        t        j                  j                  t        t        j                  j
                  t        t        j                  j
                  t        t        j                  j
                  t        d      t                     t        t        j                  j
                  t        t        j                  j
                  t        d      t                     t                           t                     t        d            } t        | t        j                  j
                  t        j                         y NrJ   r^  r8  )r   r6   mulr   r7   r.   mmr   r
   r>   r,   r-   rt  _weight_int8pack_mm_woq_patterns    r&   _register_woq_mm_int8_pattern1r|  a  s      LL  T\\11:c?CEJLL(( 22::Jx<PRURW E
 E	
 	8#L& <)A)A)I)I4<<Xr(   c                  2   t        t        j                  j                  t        t        j                  j
                  t        t        j                  j
                  t        d      t        t        j                  j
                  t        t        j                  j
                  t        d      t                     t                           t                     t        d            } t        | t        j                  j
                  t        j                         y rv  )r   r6   rw  r   r7   r.   rx  r   r>   r,   r-   r
   rt  ry  rz  s    r&   _register_woq_mm_int8_pattern2r~  z  s      LL  3LL(( 22::Jx<PRURW E
 E	
 	8#L& <)A)A)I)I4<<Xr(   c                     t        t        j                  j                  t        t        j                  j
                  t        t        j                  j
                  t        d      t                     t        t        j                  j
                  t        t        j                  j
                  t        t        j                  j
                  t        d      t                     t                     t                           t        d            } t        | t        j                  j
                  t        j                         y rv  )r   r6   rw  r   bmmr.   expandr   r
   r>   r,   r-   rt  ry  r7   rz  s    r&   _register_woq_mm_int8_pattern3r    s      HH,,josuE##LL(( 22::Jx<PRURW E 
	
 	8#L& <)A)A)I)I4<<Xr(   c                     t        t        j                  j                  t        t        j                  j
                  t        d      t        t        j                  j
                  t        t        j                  j
                  t        d      t                     t                           t        d            } t        | t        j                  j
                  t        j                         y rv  )r   r6   rw  r   rx  r.   r   r,   r-   r>   r
   rt  ry  r7   rz  s    r&   _register_woq_mm_int8_pattern4r    s    GGOOsO**22LL((x(E
 	
 	8L" <)A)A)I)I4<<Xr(   c                  h    t                t                t                t                t	                y r4   )r  r  r+  rN  rY  r   r(   r&    _register_quantization_loweringsr    s     )+*,$& "$r(   c                  T    t                t                t                t                y r4   )r|  r~  r  r  r   r(   r&   _register_woq_loweringsr    s    "$"$"$"$r(   c                       fd}|S )Nc                 *   t         j                  t         j                  fv sJ | j                         }|j                  t
        j                  j                  t
        j                  j                  t        j                  j                  t        j                  j                  fvry|j                  t        j                  j                  u r?t         j                  k(  r|j                  d   n|j                  d   j                  d   }n$t         j                  k(  r|n|j                  d   }|j                  t
        j                  j                  t
        j                  j                  fv r"t        t        |j                               dkD  ryy)NFr   r   T)r   r!   r"   r$   r   rP   rQ   r.   rR   r,   r-   r6   r7   r   r   listr1   )r   dequant_pattern_end_nodedequant_noder   s      r&   _innerz3_is_valid_dequant_promotion_pattern.<locals>._inner  sZ   7777#(#4#4#6 #** 66>> 66==&&..LL  	3
 
 #**dll.B.BB
 EMM) )-- .2215::  EMM) )-22  $::BB$::AA
 D17789A= r(   r   r   r  s   ` r&   #_is_valid_dequant_promotion_patternr    s    (T Mr(   c                 R    t        | t              |      dt        ffd       }y )Nr   r   c                 N   t         j                  t         j                  fv sJ d }| j                         }|j                  t
        j                  j                  t
        j                  j                  t        j                  j                  t        j                  j                  fv sJ fd |      }|j                  t
        j                  j                  t
        j                  j                  fv sJ | j                  }t        |j                        }|dd  D ]E  }|}	|}
|	|j                   d   k7  s |||	|
      }
|	j                   d   }	|	|j                   d   k7  r,G t"        d   dxx   dz  cc<   t"        d   dxx   t%        | j&                        z  cc<   y )Nc                 P   |j                   dk(  sJ d       | j                  |      5  | j                  |j                  |j                  |j
                        }t        j                  |j                        |_        |j                  ||       d d d        |S # 1 sw Y   S xY w)Ncall_functionz4clone_to_new_node only support node.op call_function)r   r   )	opinserting_beforer  r   r   r   copyr   replace_input_with)graphsource_node	user_nodenew_nodes       r&   clone_to_new_nodezV_register_dequant_promotion_pass.<locals>.dequant_promotion.<locals>.clone_to_new_node  s     >>_4 F4 ''	2 D ..&&$))&-- / 
 !%		+*:*: ;,,[(CD OD Os   A)BB%c                     | j                   t        j                  j                  t        j                  j                  fv r| S t        | j                        dk\  sJ d        | j                  d         S )Nr   z=In in dequant pattern, each node should have more than 1 arg.r   )r   rP   rQ   r.   rR   r   r   )_node#_find_first_node_in_dequant_patterns    r&   r  zh_register_dequant_promotion_pass.<locals>.dequant_promotion.<locals>._find_first_node_in_dequant_pattern>  sn    ||$::BB$::AA  
 5::!+ S+ ;5::a=IIr(   r   r   r   dequant_promotion_matcher_countdequant_promotion_matcher_nodes)r   r!   r"   r$   r   rP   rQ   r.   rR   r,   r-   r6   r7   r  r  r1   r   r   r   r   )r   r   r   r  r  dequant_pattern_start_noder  user_node_listr  _source_node
_user_noder  r   s              @r&   dequant_promotionz;_register_dequant_promotion_pass.<locals>.dequant_promotion  s   < 7777	& $)#4#4#6 '.. 66>> 66==&&..LL  	3
 
 	
 
	J &I$&
" *00 66>> 66==5
 
 	
 
 6<<='+ 	4I3L"J"<"A"A!"DD.ulJO
+003 "<"A"A!"DD	4 	>?1D?>?3u{{CSS?r(   )r   r  r   )r/   r   r   r  s     ` r&    _register_dequant_promotion_passr    s5    $7>
[T [T
[Tr(   c                       fd}|S )Nc                    | j                         }|j                  t        j                  j                  u sJ |j
                  d   j                  j                  d      }|j
                  d   j                  j                  d      }||fD ]K  }|F|j                  j                  dk7  r|j                  j                  dk7  s|j                         dk7  sK y t        j                  t        j                  fv sJ t        j                  k(  r|j
                  d   }n|j
                  d   }|j
                  d   }t        t        |j                               dk7  ryy)	Nr   r   r   ra  xpu   FT)r$   r   r6   convolutionr.   r   r   r   rb  rc  rE  r   r!   r"   r   r  r1   )r   	conv_nodeinput_meta_valueweight_meta_value
meta_valuer  convert_to_bf16r   s          r&   r  z0_is_valid_dequant_conv2d_pattern.<locals>._innerc  s?    %%'	4#3#3#;#;;;;$>>!,1155e<%NN1-2266u=+->? 	J"%%**e3
8I8I8N8NRW8W>>#q( 	 7777EMM!$>>!,L'nnQ/O*//2LtL&&'(A- r(   r   r  s   ` r&    _is_valid_dequant_conv2d_patternr  b  s    B Mr(   c                 R    t        | t              |      dt        ffd       }y )Nr   r   c                 D   t         j                  t         j                  fv sJ | j                         }|j                  t
        j                  j                  u sJ t         j                  k(  r|j                  d   }n|j                  d   }|j                  d   }|j                  d   j                  t
        j                  j                  u }|r|j                  d   nd}t         j                  k(  r!|r|j                  d   n|j                  d   }n/|r|j                  d   n|j                  d   }	|	j                  d   }|j                  t        j                  j                  u sJ |d   |d   |d   }}}
|d   |d   |d	   }}}|d
   |d   |d   |d   |d   f\  }}}}}|
j                  j                  d      j                  }t        |      rd}| j                   }|j#                  |      5  |||||||||f	}t         j$                  j&                  j(                  }|j+                  ||      }|
||||||||||dddg df}|j+                  t         j$                  j&                  j,                  j                  |      }|j/                  |       |j                  j1                  |j                         |j3                  |       t         j                  k(  r|j3                         |j3                  |       ||j3                  |       t         j                  k(  r|j3                  	       |j3                  |       t4        d   dxx   dz  cc<   t4        d   dxx   t7        | j8                        z  cc<   ddd       y# 1 sw Y   yxY w)ao  
        Match the pattern:
        int8 activation
          |
        dequant_per_tensor
          |
        Conv2d <- optional(aten.clone.default) <- dequant_per_channel <- int8_weight

        Insert weight prepack node and change the pattern to:
        int8 activation
          |
        onednn.qconv2d_pointwise <- onednn.qconv_prepack <- int8_weight
        r   r   NrJ   rL   rK   rV   rW   rX   r`   ra   rb   rc   rd   tensor_metar         ?none r   $qconv2d_weight_prepack_matcher_count$qconv2d_weight_prepack_matcher_nodes)r   r!   r"   r$   r   r6   r  r.   r   clonerP   dequantize_per_channelr   r   rQ  r   r  r  r5   rj   qconv_prepackr  rk   replace_all_uses_withupdate
erase_noder   r   r   )r   r   r   r  r  r  )has_clone_to_channel_last_node_in_pattern
clone_nodedequant_per_channelweight_to_bf16_noderT  rL   rK   qwrW   rX   biasra   rb   rc   rd   rm  r  packed_weight_inputspacked_weight_opprepack_weight_nodenew_argsnew_conv_noder   s                               r&   qconv_weight_prepackzA_register_qconv_weight_prepack_pass.<locals>.qconv_weight_prepack  s   & 7777%%'	4#3#3#;#;;;;EMM!$>>!,L'nnQ/O*//2LNN1$$

(:(:: 	2 "KINN1PT 	 EMM! = "^^A&   = "^^A&  
 #6":":1"=  &&#::BBC	
C 3K6N9 D :96N G 3K89:83
/fgx ''++m,22G$G##I. :	 
$   %yy//=="'"5"5 '; #6 #
 ##)H& "//		  22:: 0 M ++M:%%inn5 Y'&  1\*%  ,&  !4501Z !GHAMHZ !GHCM Hq:	 :	 :	s   9FNN)r   r  r   )r/   r   r   r  s     ` r&   #_register_qconv_weight_prepack_passr    s2    $4U;
FE F
Fr(   c                 ~   |t         j                  t         j                  fv sJ t        t        j
                  j                  t        t               t        d      |t         j                  k(        | t        d      t        d      t        d      t        d      t        d      t        d      t        d      
      }|S )	Nautocast_act_dtyper`   ra   rb   rc   is_transposedout_paddingrd   )
r   r!   r"   r   r6   r  r.   r2   rU   r   )r?   r    dequant_convolution_node_patterns      r&   *_generate_dequant_convolution_node_patternr    s     U]]ENN3333'3  08:+,U^^#	

 	%389:?#=!8($  ,+r(   c                     | t         j                  t         j                  fv sJ t        | t         j                  k(  rt        nt
        |       t        | t         j                  k(  rt        |       fS t        |       fS r4   )r   r!   r"   r  %dequantize_per_channel_weight_pattern-dequantize_per_channel_to_bf16_weight_pattern+dequantize_per_channel_clone_weight_pattern3dequantize_per_channel_to_bf16_clone_weight_pattern)r   s    r&   '_generate_qconv_weight_prepack_patternsr  -  s~    U]]ENN33332% 2>		
 	3% 8 		
  E		
 r(   c                    d }|r|rH| j                         }|j                  t        j                  j                  u sJ |j
                  d   }nTt        | j                  t        j                  j                        }t        |      dk(  sJ |d   }n| j                         }|j                  t        j                  j                  t        j                  j                  t        j                  j                  fv sJ ||fS r  )r$   r   r6   r7   r.   r   r   r   r  r   addmmrx  )r   input_dim_exceeds_twoinput_contiguousoutput_reshape_nodelinear_nodelinear_nodess         r&   _get_linear_noder  C  s    "'"3"3"5&--1E1EEEE-2215K'TXX5E5EFL|$)))&q/K'')

"   
 +++r(   c                    d }d }d }|r|ry| j                   |   }|j                  t        j                  j                  u sJ |t
        j                  k(  r|j                   d   }n|j                   d   }|j                   d   }n| j                   |   }|j                  t        j                  j                  u sJ |t
        j                  k(  r|j                   d   }n`|j                   d   }|j                   d   }nA|t
        j                  k(  r| j                   |   }n| j                   |   }|j                   d   }||||fS )Nr   )r   r   r6   r7   r.   r   r!   r  )	r  input_indexr   r  r  act_reshape_nodeactivation_to_bf16_nodeact_expand_noder  s	            r&   _get_linear_dq_noder  Y  sF    "O*//<#**dll.B.BBBB%/44Q7 +;*?*?*B'6;;A> *..{;O"))T[[-@-@@@@%.33A6*9*>*>q*A'6;;A>EMM!&++K8L '2&6&6{&C#277:L)+BOSSr(   c                       fd}|S )Nc                     t        |       \  }}|j                  t        j                  j                  u rdnd}
t
        j                  t
        j                  fv sJ t        ||
      \  }}}}|j                  t        j                  j                  t        j                  j                  fv sJ t        t        |j                              dk7  ryrs| j                  d   }| j                  d   }t!        |d      rgt#        |j$                  j'                  dd       t
        j(                        r3|j$                  d   j+                         t        j,                  |      k(  sy| j                  d   }|ddgk7  ry| j                  d	   }| j                  d
   }	t!        |d      rt#        |j$                  j'                  dd       t
        j(                        rt        |j$                  d   j+                               dk(  rt        |	      dk(  rr|	d   |j$                  d   j+                         d   k(  rL|	d   |j$                  d   j+                         d   k(  r&|	d   |j$                  d   j+                         d   k(  syy)Nr   r   Fact_expand_sizerJ   r   r   r=   rV   wgt_expand_sizer      T)r  r   r6   r  r.   r   r!   r"   r  rP   rQ   rR   r   r  r1   r   r   r   r   r   r   r   Size)r   r  r  r  r  r  act_nodewgt_permute_dimsqweight_noder  r   r  r  s             r&   r  z0_is_valid_dequant_linear_pattern.<locals>._inner  sa   
 U$9;KL	
 '--1C1CCa7777  e-BDT
	

 "" 66>> 66=='
 
 	
 

 tL&&'(A-  !)9 $ll+<=O||C(H&)x}}00=u||L]]5)..0EJJ4OO  %||N;Aq6) !<<
3L#ll+<=Of-|0044UDA5<<P))%05578A=(A-#A&(--*>*C*C*Ea*HH#A&,*;*;E*B*G*G*I!*LL#A&,*;*;E*B*G*G*I!*LLr(   r   )r   r  r  r  s   ``` r&    _is_valid_dequant_linear_patternr  ~  s    BH Mr(   c                 ^    t        | t              |      dt        ffd       }y )Nr   r   c                 $
   t         j                  t         j                  fv sJ t        | !       \  }}|j                  t
        j                  j                  u rdnd}|dz   }t        ||!       \  }}}	}
!rI sG|j                  |   }|j                  t
        j                  j                  u sJ |j                  d   }n|j                  |   }t         j                  k(  r|j                  d   }n|j                  d   }|j                  d   }|j                  t        j                  j                  u sJ |d   |d   |d   }}}|d   |d   |d   }}}d	|v r|d	   nd
}|j                  j                  d      j                  }t!        |      rd
}| j"                  }|j%                  |      5  ||f}t         j&                  j(                  j*                  }|j-                  ||      }|||||||dddg df}t         j.                  j0                  j2                  }t5        ||      rLt5        ||      r@|j-                  t         j&                  j(                  j6                  j8                  |      }n?|j-                  t         j&                  j(                  j6                  j                  |      }!r r7|j;                  |       |j                  j=                  |j                         n|ro| j?                         }|j                  t
        j@                  jB                  u sJ |j;                  |       |j                  j=                  |j                         nm|j;                  |       |j                  j=                  |j                         n6|j;                  |       |j                  j=                  |j                         !r) r|jE                  |       n s|r|jE                         |jE                  |       !r6 r|jE                  |       n"|jE                  |
       |jE                         t         j                  k(  r|jE                  |	       |jE                  |       |jE                  |       t         j                  k(  r|jE                         |jE                  |       tF        d   dxx   dz  cc<   tF        d   dxx   tI        | jJ                        z  cc<   d
d
d
       y
# 1 sw Y   y
xY w)aX  
        Match the pattern:
        int8 activation
          |
        dequant_per_tensor
          |
        mm/addmm <- t <- dequant_per_channel <- int8_weight

        Insert weight prepack node and change the pattern to:
        int8 activation
          |
        onednn.qlinear_pointwise <- onednn.qlinear_prepack <- int8_weight
        r   r   rJ   rL   rK   rV   rW   rX   r`   Nr  r  r  r  r  r   $qlinear_weight_prepack_matcher_count$qlinear_weight_prepack_matcher_nodes)&r   r!   r"   r  r   r6   r  r.   r  r   r  rP   r  r   r   rQ  r   r  r  r5   rj   qlinear_prepackr  r   r<  r   r   rz   rR   r  r  r$   r   r   r  r   r   r   )"r   r   r   r  r  r  weight_indexr  r  r  r  wgt_expand_nodet_noder  r  rT  rL   rK   r  rW   rX   r  rm  r  r  r  r  r  r   new_linear_nodeoutput_add_node_for_biasr   r  r  s"                                  r&   qlinear_weight_prepackzE_register_qlinear_weight_prepack_pass.<locals>.qlinear_weight_prepack  s   * 7777 U$9;KL	
&--1C1CCa"Q  e-BDT
	
#
 !)9)..|<O"))T[[-@-@@@@$))!,F %%l3FEMM!"(++a."(++a."5":":1"=&&#::BBC	
C 3K6N9 D :96N G "Vmvc{''++m,22G$G##K0 N	 $   %yy//??"'"5"5 '; #6 #
 #)H 88==%%D'4(Zd-C"'"5"5II$$66==H #6 # #("5"5II$$66>>X #6 # %#'==oN#((//0C0H0HI383D3D3F07>>$((//QQQ0FFW',,334L4Q4QR#99/J',,33K4D4DE11/B$$++K,<,<= %#$$%89)d$$%=>[)$#$$%56$$_5$$_5&  !89\*V$&  !4501Z !GHAMHZ !GHCM HYN	 N	 N	s   4M	TT)r   r  r   )r/   r   r   r  r  r  s     ``` r&   %_register_qlinear_weight_prepack_passr    s?     %4(*:
  Te TTr(   c                 j   |t         j                  t         j                  fv sJ t        | |      }t	        t        t        j                  j                  t        d      t	        t        t        |      t        d      |t         j                  k(        t        d      |      |      t        d      |      }t	        t        t        j                  j                  t	        t        t        |      t        d      |t         j                  k(        t        d      |      |      t        d      |      }||fS )Nr`   r  act_reshape_sizeoutput_reshape_size)r   r!   r"   rA   r:   r   r6   r  r.   r   r2   rU   rx  )r?   r   r  rS   r@   dequant_linear_bias_patterndequant_linear_no_bias_patterns          r&   %_generate_dequant_linear_node_patternr  k  s     U]]ENN3333*+GOI"DJJsO.8@AST34U^^+
 -.% 	
 	()!#$ &HGGOO.8@AST34U^^+
 -.% 	
 	()&"" '(FFFr(   c                    t        | |      }|t        j                  t        j                  fv sJ t	        t
        j                  j                  t	        t
        j                  j                  t        t        |      t        d      |t        j                  k(        t        d            t	        t
        j                  j                  |t        d                  }d } |||      S )Nr  r  r  c                 f    |r.t        t        j                  j                  | t	        d            S | S )Nr`   )r   r6   r   r   r   )_dequant_bmm_pattern
_with_biass     r&   !_generate_pattern_with_output_addzM_generate_dequant_bmm_node_pattern.<locals>._generate_pattern_with_output_add  s/    $3  ('r(   )rA   r   r!   r"   r   r6   r  r.   r  r2   rU   r   )r?   r   	with_biasrS   r@   dequant_bmm_patternr
  s          r&   "_generate_dequant_bmm_node_patternr    s     ++GOIU]]ENN3333&KK4<=OP/0'
 ()	
 	KK()	
$( --@)LLr(   c                 R    |r|st        t        | ||      S t        t        | ||      S r4   )r  r  r  r   r  r  r  rS   s        r&   )_generate_qlinear_weight_prepack_patternsr    s>     %511	
 	
 51!	
 	
r(   c                     t         j                  }t        | |      }|r|st        t        j
                  j                  t        t        j                  j                  t        d      t        d            t        t        j                  j                  |t        d                  }t        t        j                  j                  |t        d            }|rHt        t        j                  j                  |      }t        t        j                  j                  |      }||fS t        t        d      t        d      |      }t        t        t        t        j                  j                  t        d      ||      t        d      |      |rt        j                  j                  nd       }	t        t        t        t        j                  j                  ||      t        d      |      |rt        j                  j                  nd       }
|	|
fS )NrJ   r  r  r`   r  r  )r   r!   rA   r   r6   r  r.   r  r   r   r   relur:   r   r  rx  )_dequant_weight_patternr  r  
relu_fusedr   r@   pattern_no_biaspattern_with_biasx_pattern_with_reshaper  r  s              r&   %_generate_linear_dynamic_fp16_patternr    s    MME*+BEJI%5 'HH##3,-
 ##,-
 )HHOOsO

  ,TYY->->@Q R*499+<+<oNO /11?3%&
 #>*

""3&	 ,-!		
 (		T# &A*&
 ,-!	
 (		T&" '(FFFr(   c                  0   t        j                  t        j                  t        j                  gddgddg      } | D ]X  \  }}}t        t        t        t        |      t        d      |t        j                  k(        t        d      |      d|       Z y )	NTFrS   r  r  )r9   r   r   r   )
	itertoolsproductr   r!   r"   r  r:   r2   rU   r   )dequant_pattern_casesr   r  rS   s       r&   _register_dequant_promotionr    s    %--	'$u =R #
8$&8* 	).8@+= 34U^^+ -.2
 	
+#
r(   c                      t         j                  t         j                  fD ]"  } t        |       }|D ]  }t	        |d|         $ y )Nr   r  )r   r!   r"   r  r  )r   weight_prepack_patternsweight_prepack_patterns      r&   _register_qconv_weight_prepackr#  H  sH    --0 "I%"P&= 	"/&AU	r(   c                     t        j                  t        j                  t        j                  gddgddg      } | D ]*  \  }}}t        |||      }|D ]  }t        |d||        , t        j                  t        j                  t        j                  gddgddg      D ]*  \  }}}t        |dd||      }t        ||rdnd|dd       , y )	NTFr  r   )r   r   r  r  r   )r   r   r  r  )r  r  r   r!   r"   r  r  )linear_weight_prepack_casesr   r  rS   r!  r"  r  bmm_patterns           r&    _register_qlinear_weight_prepackr'  R  s   F #,"3"3	'$u#
 =X 8$&8"K!1#

 '> 	"1&&;		* 1:0A0A	'$u1 
,y, @"&"1
 	. "&"	

r(   c                 \    dt         fd}t        | ||      dt         ffd       }y )Nr   c                 B    | j                   d   t        j                  k(  S )N
dtype_fp16)r   r   float16r   s    r&   _extra_check_fnzJ_register_linear_dynamic_fp16_weight_prepack_pass.<locals>._extra_check_fn  s    ||L)U]]::r(   r   c                    |d   }|d   }d|v r|d   nd}t         j                  j                  t         j                  j                  t         j                  j                  g}g }|D ]'  }|j                  t        | j                  |             ) t        |      dk(  sJ |d   }	t        |	t        j                  j                  j                        sJ |	j                  t         j                  j                  u rdnd}
|
dz   }d} r@| j                         }t        |t        j                  j                  j                        sJ d\  }}}}}d}rrd|	j                   |
   }|	j                   |   }t#        t%        |	j&                              }|j                  t         j(                  j                  u sJ |	j                   |
   }|	j                   |   }t        |t        j                  j                  j                        sJ |j                   d   }|rUt#        t%        |	j&                              }|j                  t         j*                  j,                  u sJ |	j                   |   }t        |t        j                  j                  j                        sJ |j                   d   }t        |t        j                  j                  j                        r&|j                  t.        j0                  j2                  u sJ |j                   d   }t        |t        j                  j                  j                        r&|j                  t.        j0                  j2                  u sJ |j4                  j7                  d      j8                  }t;        |      rd}| j<                  }|j?                  |	      5  ||f}t        j@                  jB                  jD                  }|jG                  ||	      }|||f} r.t        j@                  jB                  jH                  j                  n-t        j@                  jB                  jJ                  j                  }|jG                  ||	      }| j                         }|jM                  |       |j4                  jO                  |j4                         ||jQ                  |       ||jQ                  |       ||jQ                  |       |jQ                  |	       |At        |t        j                  j                  j                        sJ |jQ                  |       |At        |t        j                  j                  j                        sJ |jQ                  |       |At        |t        j                  j                  j                        sJ |jQ                  |       |jQ                  |       |jQ                  |       |jQ                  |       tR        d
   dxx   dz  cc<   tR        d
   dxx   t        | j                        z  cc<   ddd       y# 1 sw Y   yxY w)a-  
        Match the pattern:
        fp32 activation
          |
        mm/addmm <- t <- to_fp32 <- to_fp16 <- weight
          |
        (reshape) <- (relu)

        OR

        fp32 activation
          |
        expand
          |
         bmm <- expand <- t <- to_fp32 <- to_fp16 <- weight
          |
        (add) <- (relu)

        Insert weight prepack node and change the pattern to:
        fp32 activation
          |
        onednn.linear_dynamic_fp16 <- onednn.linear_prepack_fp16 <- weight
        (or onednn.linear_relu_dynamic_fp16)
        rJ   wr`   Nr   r   )NNNNNr  r  r   r  r  )*r6   r  r.   rx  r  extendr   r   r   r   r   r   r<  r   r   r$   r   r   r   r1   r7   r   r   rP   r-   no_fuser   r   rQ  r   r  r  r5   rj   linear_prepack_fp16r  linear_relu_dynamic_fp16linear_dynamic_fp16r  r  r  r   )!r   r   r   rJ   r.  r  nodes_to_findr  r<  r  r  r  	relu_noder  r  expand_x_nodeexpand_w_nodeadd_bias_noder  w_to_fp32_nodew_to_fp16_noderm  r  r  r  r  r  	linear_opr  out_noder  r  r  s!                                 r&   "linear_dynamic_fp16_weight_prepackz]_register_linear_dynamic_fp16_weight_prepack_pass.<locals>.linear_dynamic_fp16_weight_prepack  s_   > 3K3K!Vmvc{ ++TWW__dhh>N>NO! 	ADU[[$ ?@	A< A%%%"1o+uxx}}'9'9:::&--1C1CCa"Q 	))+Ii););<<< +	
 #.#3#3K#@ $)),7&*40A0A+B&C#*11T\\5I5IIII + 0 0 = + 0 0 >!-1C1CDDD&++A.$(k.?.?)@$AM(//488??BBB %%l3F&%((--"4"4555Q~uxx}}'9'9:%%#88@@A	
A (,,Q/~uxx}}'9'9:%%#88@@A	
A
 &&**]+11G$G##K0 3	 $   %yy//CC"'"5"5 '; #6 # #)H  		  99AAYY%%99AA 
 $11)(1KO((*H**?;   ''6$  +".  !45(  /[)+!"2EHHMM4F4FGGG  !12(!-1C1CDDD  /(!-1C1CDDD  /V$^,^,Z !GHAMHZ !GHCM Hc3	 3	 3	s   :I9X<<Y)r   r   )r/   r   r  r  r  r,  r=  s     ```  r&   1_register_linear_dynamic_fp16_weight_prepack_passr>    s;    ;u ; %#
O% O
Or(   c            
      h   t         j                  j                  j                  j                  } t        | t        | t        d      t        d            t        d            }t        j                  ddgddgddg      }|D ]/  \  }}}t        ||||      }|D ]  }t        ||rdnd|||        1 y )	Nr.  r*  
dtype_fp32FTr   r   )r   r  r  r  )r   r5   rP   r-   r0  r   r   r  r  r  r>  )to_dtype_opweight_patterncasesr  r  r  patternsr/   s           r&   ,_register_linear_dynamic_fp16_weight_prepackrE  E	  s    ))00EEMMK!sO|$	

 	< N 		u	E
 @E ;/8!	
   	G=!+A&;!1%	r(   c                     ddt         dt         fd} d } | | d            }t        t        j                  j                  |t        d            } | | d            }t        t        j                  j                  t        t        j                  j                  t        t        j                  j                  |t        d            t                     t        d	            } | dd
      } | dd
      }dt        fd}|d|d|d|d|d|di}	|	j                         D ]   \  }
}t        |
||      dt        fd       }" y)z
    The pattern is:
      (no bias) reshape -> _int_mm -> convert_element_type -> (expand ->) mul -> mul -> reshape
    or
      (with bias) pattern_no_bias -> add (-> reshape -> reshape)
    Texpand_a_scale	reshape_ac                 Z   t        t        j                  j                  t        t        j                  j                  t        t        j
                  j                  t        t        j                  j                  |r7t        t        j                  j                  t        d      t        d            n
t        d      t        d            t        d            | r6t        t        j                  j                  t        d      t                     n
t        d            t        d            S )Nain_shaper`   r   rK   rW   )r   r6   rw  r   r,   r-   r.   _int_mmr7   r   r  r
   rG  rH  s     r&   get_pattern_no_biaszB_register_smooth_quant_int_mm_pattern.<locals>.get_pattern_no_biasp	  s    HHOO..66 ,, % % LL00&sO&z2 (_"3
 w'* & !++"9- $I.16 y!;
 	
r(   c                 ^    t        t        j                  j                  | t	        d            S )Nout_shape_no_bias)r   r6   r7   r.   r   )r/   s    r&   _with_outer_reshapezB_register_smooth_quant_int_mm_pattern.<locals>._with_outer_reshape	  s&    LL  ':6I+J
 	
r(   F)rG  r  out_shape_with_biasrM  r   c                    t        | j                        dvryt        | j                  t        j                  j
                        d   }t        |j                  d   t        j                  j                  j                        sy|j                  d   j                  dk7  ryt        | j                        dk(  r:| j                  d   j                  d   | j                  d   j                  d   k7  ryt        | j                        dk(  sKt        | j                        d	k(  r| j                  d   j                  t        j                  j                  u rt        | j                        dk(  rd	nd}| j                  |   j                  d   }t        |t        j                  j                  j                        syt        |j                   j#                  d
      j$                        dk7  ryy)N)r           
   Fr   r   get_attrrW  r   rU  rV  r  T)r   r   r   r6   rL  r.   r   r   r   r   r<  r   r  r   r   r   r   r   rQ  )r   aten_int_mm_nodebias_idx	bias_nodes       r&   _validate_patternz@_register_smooth_quant_int_mm_pattern.<locals>._validate_pattern	  sn   u{{#33'T\\5I5IJ1M*//2EHHMM4F4FG  #&&*4u{{r!{{1~""1%Q)<)<Q)??u{{r!!ekk!n&;&;txx&N,2qHH-2215Ii););<9>>%%m4::;q@r(   r   r   r   r   c                 	   |j                  dd       }|d   }|d   }|d   }|d   }|d   }|j                  j                  d      j                  }	t        |	      rd }	| j	                         }
| j
                  j                  |
      5  | j
                  j                  t        j                  j                  |dd	gf
      }| j
                  j                  t        j                  j                  |f
      }||	f}t        j                  j                  j                  }| j
                  j                  ||
      }d }| j
                  j                  t         j"                  j                  |t        j$                  f
      }|j                  j                  d      j                  }d}t        |      sd}|D ]  }||z  }	 |dk(  }|r|||||||dd	|dg df}| j
                  j                  t        j                  j                  j&                  j(                  |
      }|
j+                  |       |j                  j-                  |
j                         n|j                  dd       }||}n7| j
                  j                  t        j.                  j                  ||f
      }|dd	|||d dd	|dg df}| j
                  j                  t        j                  j                  j&                  |
      }| j
                  j                  t        j0                  j2                  ||f
      }|j                  dd       d uxs |j                  dd       d u}|r|j                  d|d         }|q| j
                  j                  t        j4                  j2                  ||f
      }|rq| j
                  j                  t        j.                  j                  |f
      }n9|r7| j
                  j                  t        j.                  j                  |f
      }|
j+                  |       |j                  j-                  |
j                         t7        | j8                        D ]  }| j
                  j;                  |        t<        d   dxx   dz  cc<   t<        d   dxx   t?        | j8                        z  cc<   d d d        y # 1 sw Y   y xY w)Nr  rJ  r`   r   rK   rW   r  r   r   r  Fr  r  r  rK  rR  rP  r   r  r  ) r   r   rQ  r   r$   r  r  r  r6   r>   r.   
contiguousr   r5   rj   r  r,   r-   r!   rz   rR   r  r  r7   rw  r   r   reversedr   r  r   r   )r   r   r   r  rJ   r^  r   rK   rW   rm  r<  transpose_nodecontig_noder  r  r  dummy_zpx_scale_shapex_scale_is_scalarproddr  r  rK  
x_reshapednew_out_nodehas_outer_reshapern  r<  s                                r&   _int_mm_weight_prepackzE_register_smooth_quant_int_mm_pattern.<locals>._int_mm_weight_prepack	  s    ::fd+DsAC[F7OEY'GY'Gffjj/55G(((*H--h7 u!&!:!:LL((A/? "; " $kk77OO++>2C 8   ($ $)99#3#3#C#C &+kk&?&?$+? '@ '#  ++33..66gu}}=U 4  !( 0 0 ? E E$)!'6D* "	"(,	% %  +  H ',kk&?&?		((::AA '@ 'O 22?C#((//>  &zz*d;H'%&
%*[[%>%> LL008} &? &
 #+  H ',kk&?&?		((:: '@ 'O $);;#<#<.H $= $L 

#8$?tK M!::&94@L &
 )$*JJ16:M3N%	 '',{{'@'@ HHOO<2F (A ( -+0;;+D+D $ 4 4&2I%> ,E ,L
 -+0;;+D+D $ 4 4&2I%> ,E ,L 22<@ %%,,X]];$U[[1 1DKK**401$%KLPQQL$%KLPSKKQ Lgu u us   	P9SSNT)boolr   r6   r   r   r   r7   r.   r
   r   itemsr   )rN  rQ  pattern_no_bias_1pattern_with_bias_1pattern_no_bias_2pattern_with_bias_2%pattern1_with_no_outer_or_act_reshape%pattern2_with_no_outer_or_act_reshaper\  pattern_to_pass_numberr/   r   rj  s                r&   %_register_smooth_quant_int_mm_patternru  f	  sd   
D 
T 
B
 ,,?u,UV&6 ,,?t,TU&LL  !6"
 E	
 	()& -@-) -@u-) 6 	1Q1Q-q-q !7 < < > I	()#


B	% B	


B	Ir(   c                   ,    e Zd Z	 	 	 	 	 ddededdfdZy)
PostOpAttrNrr   rt   returnc                 n    || _         |r|nd| _        || _        |r|ng | _        |r|| _        y d| _        y )Nr  r  )rr   rs   rt   scalars_attralgorithm_attr)selfrr   rs   rt   rz  r{  s         r&   __init__zPostOpAttr.__init__h
  s<     -#U
*,8Lb0>nBr(   )r  Nr  NN)__name__
__module____qualname__strr}  r   r(   r&   rw  rw  g
  s8     %#GG 	G 
Gr(   rw  c                 z    j                   dk7  t        | t              |      dt        ffd       }|S )Nr  r   r   c                 h   |d   |d   |d   }}}|d   |d   |d   }}}|d   |d   |d	   |d
   |d   f\  }	}
}}}t        |       }|t        j                  t        j                  t        j                  t        j
                  fv sJ |t        j                  k(  s|t        j                  k(  r|d   nd}|t        j                  k(  s|t        j                  k(  r|d   nd}|d   dk(  sJ j                  dk(  r+|j                  d      }|j                  d      }||g_        | j                         }| j                  j                  |      5  s2|||||||	|
||||||j                  j                  j                  f}n|t        j                  t        j                  fv r|d   n|d   }|t        j                  t        j                  fv r|d   nd}|t        j                  t        j                  fv r|d   nd}||||||||	|
||||||||j                  j                  j                  j                  j                  f}| j                  j                  |      }|j!                  |       |j"                  j%                  |j"                         t'        | j(                        D ]  }| j                  j+                  |        	 d d d        rdnd}rdnd}t,        d   |xx   dz  cc<   t,        d   |xx   t/        | j(                        z  cc<   y # 1 sw Y   QxY w) NrJ   rK   rL   r_   rW   rX   r`   ra   rb   rc   rd   r   r  r   r   rg   r  hardtanh	min_value	max_valuero   r   rp   r   r  qconv2d_binary_matcher_countqconv2d_unary_matcher_countqconv2d_binary_matcher_nodesqconv2d_unary_matcher_nodesr   r   )r'   r   r   r    r!   r"   rt   r   rz  r$   r  r  r{  rr   rs   r  r  r   r  r_  r   r  r   r   )r   r   r   rJ   rK   rL   r_   rW   rX   r`   ra   rb   rc   rd   r%   r   r   r  r  r<  r   ro   rp   r   r  r<  	count_key	nodes_keyr   r   post_op_attrs                               r&   r   z2_register_qconv_post_op_fusion_pass.<locals>.qconv
  s    3K96N 7 ?#96N !%w 3K89:80
,67Hf 17

EKKWWWW +|uzz/I =! 	 +|uzz/I 6N 	 =!V+	
+%%3

;/I

;/I)2I(>L%$$&[[))(3 C	-%!   .. -- //#5 , $UZZ'@@ 7O 56  $UZZ'@@ =)  $UZZ'@@ :&  !   // && .. -- //-$ 0 "KK55%5 6 M **=9%%hmm4 - -&&t,-EC	-L " +. 	 " +. 	
 	Y'1,'Y'3u{{+;;'_C	- C	-s   FL((L1)rr   r   r   r   )r/   r   r   r  r   r   s     `` @r&   #_register_qconv_post_op_fusion_passr  w
  sN     &44>$:;MN
z<U z<
z<x Lr(   c                     ddl m} m}m} t        j
                  t        j                  fD ]  }|t        j                  k(  }t        dd dg d      t        t        d            t        dd dg d      t        t        t        d      t        j                  j                              t        dd dg d      t        t        |t        d      d|      |      t        dd dg d      t        t        | t        |rdnd	      d	|      |      t        dd d
g d      t        t        |t        |rdnd	      d	|      |      i}|j                         D ]?  \  }}t!        |dt        j"                  j$                  j&                  j                  |       A t        dd dg d      t        t        d      t        j                  j                        t        dd dg d      t)        t        |t        d      d|      t+               |      t        dd dg d      t)        t        | t        |rdnd	      d	|      t+               |      t        dd d
g d      t)        t        |t        |rdnd	      d	|      t+               |      i}|j                         D ]?  \  }}t!        |dt        j"                  j$                  j&                  j                  |       A  y )Nr   )_hardswish_fusion_hardtanh_fusion_silu_fusionr  r  r  r  r0   	hardswishr   swishr  r  )r   r  r  r  r   r!   r"   rw  r   rm   r   r6   r  r.   rH   rm  r  r5   rj   rk   r2   r
   )	r  r  r  original_pattern_output_dtyperF   conv_unary_replace_patterns
unary_attrrD  %conv_unary_replace_float_out_patternss	            r&   _register_qconv_unary_fusionr    s   PP*/--)H p% 05>>Afb"1(+ fb"1+,Q/1B1B
 j"b1%$,Q/	 $+ k2r1%%,'QqA	 $+ gr21% ,'QqA	 $+K.'
#` %@$E$E$G 	 J/		  22::		 vtVR46Q(+TYY->->7 j"b7%$,Q/	 	 k2r7%%,'QqA	 	 gr27% ,'QqA	 	=(1
-T %J$O$O$Q 	 J/		  22::		Spr(   c                     dD ]  } ddg}i }|D ]  }|j                  t        dddg d      t        t        t        j
                  j                  t        d      t        | |	            t        ddd
g d      t        t        t        t        j
                  j                  t        d      t        | |	      t        j                  j                              i        |j                         D ]?  \  }}t        |dt        j                  j                   j"                  j$                  |       A i }|D ]|  }|j                  t        ddd
g d      t        t        t        j
                  j                  t        d      t'        d      | |	      t        j                  j                        i       ~ |j                         D ]|  \  }}| r;t        |dt        j                  j                   j"                  j$                  |       Ct        |dt        j                  j                   j"                  j$                  |       ~ i }|D ]Z  }|j                  t        dddg d      t        t        j
                  j                  t        d      t'        d      | |	      i       \ |j                         D ]C  \  }}t        || rdndt        j                  j                   j"                  j$                  |       E  y )Nr  FTr   r  r  r  r   )r   r  r  r   r  rT  )r  rw  r   r   r6   r   r   rm   dequantize_accum_patternr   r  r.   rm  r  r   r5   rj   rk   rw   r   ) int8_mixed_bf16_with_inplace_addswap_binary_inputs_listbinary_replace_patternsr   binary_unary_attrrD  !binary_replace_float_out_patternss          r&   _register_qconv_binary_fusionr  y  s   ,9 j(#($-"$2 	K#**sFB94 HHOO4Q74<(3 sFB938 $ 8 ; 8 @,7 !II--		> ,C+H+H+J 	'x/		  2299!		 -/)2 	K-44uc62r:<W4 HHOO4Q7&'<=<(3 		))	=	& /446	 
/3II$$66==%	 4II$$66==%		( -/)2 	K-44sFB303"#898$/
	$ /446		 
/51		  2299!				Cjr(   c                 x    j                   dk7  t        | t              |      dt        ffd       }y )Nr  r   r   c                 V   t        |       }|d   |d   |d   }}}|d   |d   |d   }	}}d|v r|d   nd}
|t        j                  t        j                  fv r|d	   nd
}|t        j                  t        j                  fv r|d   nd}|d   dk(  sJ | j	                         }| j
                  j                  |      5  s.||||||	|
|||j                  j                  j                  f}nXd|v r|d   n|d   }d
}d}||||||	||
|||||j                  j                  j                  j                  j                  f}| j
                  j                  |      }|j                  |       |j                  j                  |j                         t!        | j"                        D ]  }| j
                  j%                  |        	 ddd       rdnd}rdnd}t&        d   |xx   dz  cc<   t&        d   |xx   t)        | j"                        z  cc<   y# 1 sw Y   QxY w)z>
        Match the pattern:
        qlinear - post op
        rJ   rK   rL   r_   rW   rX   r`   Nr   r  r   r   rg   r  r   ro   r  qlinear_binary_matcher_countqlinear_unary_matcher_countqlinear_binary_matcher_nodesqlinear_unary_matcher_nodesr   r   )r'   r   r    r   r$   r  r  rt   rz  r{  rr   rs   r  r  r   r  r_  r   r  r   r   )r   r   r   r%   rJ   rK   rL   r_   rW   rX   r`   r   r   r<  r   r   r   r   r  r<  r  r  r   r   r  s                         r&   qlinear_post_op_fusionzE_register_qlinear_post_op_fusion_pass.<locals>.qlinear_post_op_fusion  s    17 3K96N 7 ?#96N !%w &=F3Kd
 ejj 99 =! 	  ,UZZ/HHF6Nq 	 =!V+	
+ $$&[[))(3 /	-%!   .. -- //5   ,3f+<w&/!   // && .. -- //%$ ( $kk77%5 8 O **?;  ''6 - -&&t,-]/	-d " +. 	 " +. 	
 	Y'1,'Y'3u{{+;;'w/	- /	-s   =DHH()rr   r   r   r   )r/   r   r   r  r  r   s     `` @r&   %_register_qlinear_post_op_fusion_passr    sF     &44>$<=OP
_<e _<
_<r(   c                     ddl m} m} t        j                  t        j
                  fD ];  }|t        j
                  k(  }dD ]  }t        |      }|r.t        j                  j                  j                  j                  n-t        j                  j                  j                  j                  }t        dd dg d      t        |      t        dd dg d      t        t        |t        j                   j                              t        dd dg d      t        t#        | t        ||rdnd      d|      |	      t        dd dg d
      t        t#        |t        ||rdnd      d|      |	      i}|j%                         D ]  \  }}	t'        |	d||        t        dd dg d      t        |t        j                   j                        t        dd dg d      t)        t#        | t        ||rdnd      d|      t+               |      t        dd dg d
      t)        t#        |t        ||rdnd      d|      t+               |      i}
|
j%                         D ]  \  }}	t'        |	d||        " > y )Nr   )_gelu_fusion_1_gelu_fusion_2r  r  r  r  gelur   r  tanhr  r  )r   r  r  r   r!   r"   r~   r5   rj   rz   rR   r.   rw  r   r   r6   r  rH   rm  r  r2   r
   )_gelu_fusion_erf_gelu_fusion_tanhr  rF   r|   r
  r   linear_unary_replace_patternsr  rD  'linear_unary_replace_float_out_patternss              r&   _register_qlinear_unary_fusionr  V  s   
 +0--)H b%/5>>A&3 `	"67MNO * 		  2299YY%%77??  D&"b5# D&"b5/ARARS D&"f5)(02Aa  (/
 D&"f5))02Aa  (/
5%-)N )F(K(K(M $
H5"	 64R8:U#TYY%6%6; D&"f;)(02Aa  E D&"f;))02Aa  E) 73D )P(U(U(W $
H5"	u`	br(   c                  j   dD ]  } | r.t         j                  j                  j                  j                  n-t         j                  j                  j                  j
                  }ddg}dt        j                  j                  d}ddg}ddg}ddg}t        j                  ||||      }i }|D ]z  \  }	}
}}|
s|r|j                  t        dd	|	g d
      t        t        t        t        j                   j"                  t%        |       t'        d      ||      ||	               i       | |j)                         D ]  \  }}t+        |d||        i }|D ]|  }|j                  t        dd	dg d
      t        t        t        j                   j"                  t%        |       t'        d      d|      t        j                  j                        i       ~ |j)                         D ]  \  }}t+        |d||        i }|D ]|  }|j                  t        dd	dg d
      t        t        t        j                   j"                  t%        |       t'        d      d|      t        j                  j                        i       ~ |j)                         D ]  \  }}t+        |d||        i }|D ]Z  }|j                  t        dd	dg d
      t        t        j                   j"                  t%        |       t'        d      d|      i       \ |j)                         D ]  \  }}t+        |d||        i }|D ]Z  }|j                  t        dd	dg d
      t        t        j                   j"                  t%        |       t'        d      d|      i       \ |j)                         D ]  \  }}t+        |d||         y)a
  
    Supported linear-binary(-unary) patterns

        linear(X)   extra input
               \   /
                Add
                 |
            Optional(relu)
                 |
                 Y

    1. int8-mixed-fp32
    +---+---------------+-----------+------------------------------+---------+
    | # | Add type      | Quant out | Pattern                      | Post op |
    +---+---------------+-----------+------------------------------+---------+
    | 1 | In-/out-place | Yes       | linear + fp32 -> (relu) -> q | add     |
    +---+---------------+-----------+------------------------------+---------+
    | 2 | In-/out-place | No        | linear + fp32 -> (relu)      | sum     |
    +---+---------------+-----------+------------------------------+---------+

    2. int8-mixed-bf16
    +---+----------+---------------+-----------+-----------------------------------------+---------+
    | # | X2 dtype | Add type      | Quant out | Pattern                                 | Post op |
    +---+----------+---------------+-----------+-----------------------------------------+---------+
    | 1 | BF16     | In-/out-place | Yes       | linear + bf16 -> (relu) -> q            | add     |
    +---+----------+---------------+-----------+-----------------------------------------+---------+
    | 2 | BF16     | In-/out-place | No        | linear + bf16 -> (relu)                 | sum     |
    +---+----------+---------------+-----------+-----------------------------------------+---------+
    | 3 | FP32     | Out-place     | Yes       | linear + fp32 -> (relu) -> q            | add     |
    |   |          | In-place right|           |                                         |         |
    +---+----------+---------------+-----------+-----------------------------------------+---------+
    | 4 | FP32     | Out-place     | No        | linear + fp32 -> (relu)                 | sum     |
    |   |          | In-place right|           |                                         |         |
    +---+----------+---------------+-----------+-----------------------------------------+---------+
    | 5 | FP32     | In-place left | Yes       | linear + fp32 -> to_bf16 -> (relu) -> q | add     |
    +---+----------+---------------+-----------+-----------------------------------------+---------+
    | 6 | FP32     | In-place left | No        | linear + fp32 -> to_bf16 -> (relu)      | add     |
    +---+----------+---------------+-----------+-----------------------------------------+---------+

    Note
    (1) The positions of linear and the extra input can be swapped.
    (2) we don't insert q-dq before the extra input of linear-add by recipe. But if q-dq is found at the
    extra input, we don't match that pattern because we cannot match all these patterns in 3 passes.
    r  r  r  N)r  r  FTr   r  r  r   )r   r   r  r   ro   r  rT  )r   r5   rj   rz   r   rw   r6   r  r.   r  r  r  rw  r   r   r   r   r   r~   r   rm  r  )r|   qlinear_binary_opunary_postop_listunary_postop_dictconvert_dtype_after_binary_listr  int8_mixed_bf16_listcombinationsqlinear_binary_replace_patternsunary_opint8_mixed_bf16r   cvt_dtype_binaryr  rD  r  swap_binary_inputss                    r&   _register_qlinear_binary_fusionr    s(   Z #0 i & II..<<!!33:: 	
 $V,II%%
 ,1$-'
 $)$- %t} (( #+	
 +-'HT 	DHo{4D"'7+22sHb"938 $ 89O P *7 3 /?,7 .h7		0 ,K+P+P+R 	'x1!!		 -/)"9 	-44uc62r:<W4 HHOO45KL&w/*/(: 		))	=	$ /446		 
1!!				 -/)"9 	-44uc62r:<W4 HHOO45KL&w/*.(: 		))	=	$ /446		 
1!!				 -/)"9 	-44sFB301GH"7+&+$6
	" /446		 
1!!				 -/)"9 	-44sFB301GH"7+&*$6
	" /446		 
1!!				Air(   c                  
   t                t                t                t                t	                t
        j                  j                  j                         s)t                t                t                t                y y r4   )r  r#  r'  rE  ru  r   r5   mkldnn_is_mkldnn_acl_supportedr  r  r  r  r   r(   r&   '_register_quantization_weight_pack_passr    s]      ! #$ %&02 *+ 99446$&%'&(') 7r(   graph_modulec                 6  
 d }| j                   j                  D ]  }|j                  dk(  s|j                  t        v s't        |j                        dk(  s@ ||j                  d         sV|}|j                  d   d}|}|j                  d   
 |
      r5t        
j                        dk7  rd}n
}|j                  d   
 |
      r5|st        
j                        dk(  s|j                         | j                   j                  |      5  | j                   j                  |      }
j                  |       dt        j                  j                  dt        j                  j                  f
fd	}t        |j                  |      }t        |j                   |      }	||_        |	|_        | j                   j#                  |       d
d
d
        | j                   j%                          | j'                          y
# 1 sw Y   xY w)a  
    Lift up the quant node before view like nodes. It can benefit performance
    of Attention like block. For example, we have the pattern as:

             DQ
    DQ       LINEAR
    LINEAR   VIEW
    VIEW     PERMUTE
    PERMUTE  TRANSPOSE
    Q        Q
    DQ       DQ
       Matmul
        DIV
        ADD
      SOFTMAX

    We want to lift up the the quant nodes from matmul before view like nodes
    as the output of Linear node.

             DQ
    DQ       LINEAR
    LINEAR   Q
    Q        VIEW
    VIEW     PERMUTE
    PERMUTE  TRANSPOSE
    DQ       DQ
       Matmul
        DIV
        ADD
      SOFTMAX

    It produces a DQ->LINEAR->Q pattern which can be fused by backend.
    c                 H    | j                   dk(  xr | j                  t        v S )Nr  )r  r   	_VIEW_OPS)r<  s    r&   
is_view_opz!quant_lift_up.<locals>.is_view_op  s    ww/)FdkkY.FFr(   r  r   r   TFnrx  c                     | k(  rS | S r4   r   )r  
input_nodeinput_node_of_quants    r&   maybe_replace_nodez)quant_lift_up.<locals>.maybe_replace_node  s     33#--#$Hr(   N)r  r   r  r   _PER_TENSOR_QUANTIZE_OPSr   all_input_nodesr   r1   r  r  	node_copyr   r   r   r   r   r  lint	recompile)r  r  r<  
quant_nodecould_lift_upcurrent_nodenew_quant_noder  r  
new_kwargsr  r  s             @@r&   quant_lift_upr    s   FG ""(( /> GG&77D(()Q.4//23J",//!"4 !M%L%**1-JZ(z''(A-$)M))..q1
 Z( Z%5%5!6!!;001DE!''88F >%1%7%7%A%A*%MN44^D%ehhmm % %  '~':':<NOH!()>)>@R!SJ*2N',6N) &&11*=> >A/>b %> >s   B=HH	)F)r   )FFrk  )FTF)r  	functoolsr  r5  r  typingr   r   torch._dynamo.utilsr   %torch.fx.experimental.symbolic_shapesr   torch.fx.noder   loweringr   r   r	   pattern_matcherr
   r   r   r   r   r   utilsr   freezing_patternsr   	post_gradr   r5   r6   r,   rP   r#  r   r.   rR   r  	transposeintr>   viewr  r'   r2   r:   rA   rH   rU   r  r  r  r  r  r  rm   rx   r~   r   rQ   r  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r+  r>  rJ  rL  rN  rV  rY  rd  rt  r|  r~  r  r  r  r  r!   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r#  r'  r>  rE  ru  rw  r  r  r  r  r  r  	lru_cacher  r   GraphModuler  r   r(   r&   <module>r     s          ( B ! < X X   > 0 yy~~		yy55 II	 ,,44,,33  	NNLLII	"U  5T >AUQU "*4 )5//77zyvx}}y	) % --'( . /;JJ)_-/ + 7CJJ1_-7 30:2< (..66w}zEE   6,". AH 8vM` #'K\K\
2
2-`;
|:$ 1=..66EEEEEE1 -	$	40Y2Y2Y2Y*%% /4mm +\ BG aTH"J EJMM L` ).,. 38-- ,,,"TJEV --bN --	+G` --	&MT --
2  	BGJ'
TO
j  ^BB~BG G IXslk\l<^hVVr T* *.X 4 4 Xr(   