
    AVh              	          d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 dd	l	mZ dd
l	mZ ddl	mZ ddl	mZ ddl	mZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ  ed      ej:                  d5d              Z ed      ej:                  	 	 	 d6d              Z ed      ej:                  	 	 	 	 d7d              Z  ed      ej:                  	 	 	 	 d7d              Z! ed      ej:                  	 	 	 d6d              Z" ed       ej:                  	 	 	 d6d!              Z#d" Z$ ed#g $      ej:                  d8d%              Z% ed&g$      ej:                   ejL                  dd'd(      	 	 d9d)                     Z' ed#g$      ej:                  	 	 d:d*              Z( ejR                  ejT                        	 	 	 	 d;d+ejV                  fd,       Z, ed-      ej:                  	 	 	 	 	 d<d.              Z- ejR                  ej\                        	 	 	 d=d/e/d+ej`                  ejV                  ejb                  ejd                     f   fd0       Z.d5d1Z3d2 Z4ejj                  fd3Z6 ejR                  ejn                        	 	 d>d+ejb                  ejd                     fd4       Z7y)?z2Ragged operations for working with string Tensors.    N)constant_op)dtypes)ops)tensor)tensor_util)	array_ops)array_ops_stack)cond)gen_string_ops)map_fn)
string_ops)ragged_array_ops)ragged_functional_ops)ragged_math_ops)ragged_tensor)compat)deprecation)dispatch)	tf_exportzstrings.bytes_splitc                    t        j                  |d| g      5  t        j                  | d      } t	        | t        j
                        r-| j                  t        | j                              cddd       S | j                  j                  }|t        d      |dk(  r+t        t        j                  | g            d   cddd       S |dk(  rRt        j                  | dd	
      \  }}}t        j
                  j!                  ||dddf   |d   d	      cddd       S t        t        j
                  j#                  |             cddd       S # 1 sw Y   yxY w)a  Split string elements of `input` into bytes.

  Examples:

  >>> tf.strings.bytes_split('hello').numpy()
  array([b'h', b'e', b'l', b'l', b'o'], dtype=object)
  >>> tf.strings.bytes_split(['hello', '123'])
  <tf.RaggedTensor [[b'h', b'e', b'l', b'l', b'o'], [b'1', b'2', b'3']]>

  Note that this op splits strings into bytes, not unicode characters.  To
  split strings into unicode characters, use `tf.strings.unicode_split`.

  See also: `tf.io.decode_raw`, `tf.strings.split`, `tf.strings.unicode_split`.

  Args:
    input: A string `Tensor` or `RaggedTensor`: the strings to split.  Must
      have a statically known rank (`N`).
    name: A name for the operation (optional).

  Returns:
    A `RaggedTensor` of rank `N+1`: the bytes that make up the source strings.
  StringsByteSplitinputnameNz(input must have a statically-known rank.r       F)	delimiter
skip_emptyvaluesvalue_rowidsnrowsvalidate)r   
name_scoper   "convert_to_tensor_or_ragged_tensor
isinstanceRaggedTensorwith_flat_valuesstring_bytes_splitflat_valuesshapendims
ValueErrorr	   stackr   string_splitfrom_value_rowidsfrom_tensor)r   r   rankindicesr    r+   s         ^/home/dcms/DCMS/lib/python3.12/site-packages/tensorflow/python/ops/ragged/ragged_string_ops.pyr)   r)   (   sH   2 ~~d.8 O<<UBIKE%334##$6u7H7H$IJ	O O ;;D|ABBqy 5 5ug >?BO O 
-::
2% 1gvu''99gadm58 : O O&   : : F Fu MN'O O Os    AE8A	EAE"'EEzstrings.unicode_encodec                    t        j                  |d| g      5  t        j                  |       }|j                  j
                  t        d      t        j                  |      r|j                  j                  j
                  dkD  r0|j                  t        |j                  |||            cddd       S |j                  dkD  r0|j                  t        |j                  |||            cddd       S t        j                  |j                  |j                  |||      cddd       S |j                  j
                  dk(  r4t        t        j                   j#                  |      |||      cddd       S |j                  j
                  dkD  r{t%        j&                  |t)        j*                  dt%        j                  |      d   g            }t        ||||      }t%        j&                  ||j                  dd       cddd       S |j                  j
                  dk(  rt        d	      t        j                   j-                  |t)        j*                  dt%        j                  |t.        j0                  
      d   g      d      }t        ||||      }	t%        j&                  |	g       cddd       S # 1 sw Y   yxY w)a  Encodes each sequence of Unicode code points in `input` into a string.

  `result[i1...iN]` is the string formed by concatenating the Unicode
  codepoints `input[1...iN, :]`, encoded using `output_encoding`.

  Args:
    input: An `N+1` dimensional potentially ragged integer tensor with shape
      `[D1...DN, num_chars]`.
    output_encoding: Unicode encoding that should be used to encode each
      codepoint sequence.  Can be `"UTF-8"`, `"UTF-16-BE"`, or `"UTF-32-BE"`.
    errors: Specifies the response when an invalid codepoint is encountered
      (optional). One of:
            * `'replace'`: Replace invalid codepoint with the
              `replacement_char`. (default)
            * `'ignore'`: Skip invalid codepoints.
            * `'strict'`: Raise an exception for any invalid codepoint.
    replacement_char: The replacement character codepoint to be used in place of
      any invalid input when `errors='replace'`. Any valid unicode codepoint may
      be used. The default value is the default unicode replacement character
      which is 0xFFFD (U+65533).
    name: A name for the operation (optional).

  Returns:
    A `N` dimensional `string` tensor with shape `[D1...DN]`.

  #### Example:

  >>> input = tf.ragged.constant(
  ...     [[71, 246, 246, 100, 110, 105, 103, 104, 116], [128522]])
  >>> print(unicode_encode(input, 'UTF-8'))
  tf.Tensor([b'G\xc3\xb6\xc3\xb6dnight' b'\xf0\x9f\x98\x8a'],
            shape=(2,), dtype=string)
  UnicodeEncodeNz.Rank of input_tensor must be statically known.r   )input_valuesinput_splitsoutput_encodingerrorsreplacement_char   r   z'input_tensor's rank must be at least 1.out_typeFr#   )r   r$   r   r%   r+   r,   r-   	is_raggedr*   r(   unicode_encoderagged_rankwith_valuesr    r   
row_splitsr'   r1   r   reshaper	   r.   from_row_splitsr   int32)
r   r9   r:   r;   r   input_tensorflat_input_tensorflat_output_tensorragged_input_tensoroutput_tensors
             r4   rB   rB   X   s   P ~~dOeW5 64 CCEJL'GHH|,		!	!	'	'	-	-	1 ,,<33_f+-.64 64 ##a'''<..+-.64 64( ,,%,,%00+-/)64 646 
			!	!Q	&&&22<@V%57;64 64@ ##a' &--!!2y|'DR'H"IJL ,,=,24DF  !3\5G5G5LMQ64 64R ##q(BCC
 ,88HH!!IOOL6<<HKLN	 I 
 '':O'-/?A  3m64 64 64s-   BJ=85J=7-J=.AJ=;B
J=B$J==Kzstrings.unicode_decodec           	          t        j                  |d| g      5  t        | ||||d      cddd       S # 1 sw Y   yxY w)a  Decodes each string in `input` into a sequence of Unicode code points.

  `result[i1...iN, j]` is the Unicode codepoint for the `j`th character in
  `input[i1...iN]`, when decoded using `input_encoding`.

  Args:
    input: An `N` dimensional potentially ragged `string` tensor with shape
      `[D1...DN]`.  `N` must be statically known.
    input_encoding: String name for the unicode encoding that should be used to
      decode each string.
    errors: Specifies the response when an input string can't be converted
      using the indicated encoding. One of:
      * `'strict'`: Raise an exception for any illegal substrings.
      * `'replace'`: Replace illegal substrings with `replacement_char`.
      * `'ignore'`: Skip illegal substrings.
    replacement_char: The replacement codepoint to be used in place of invalid
      substrings in `input` when `errors='replace'`; and in place of C0 control
      characters in `input` when `replace_control_characters=True`.
    replace_control_characters: Whether to replace the C0 control characters
      `(U+0000 - U+001F)` with the `replacement_char`.
    name: A name for the operation (optional).

  Returns:
    A `N+1` dimensional `int32` tensor with shape `[D1...DN, (num_chars)]`.
    The returned tensor is a `tf.Tensor` if `input` is a scalar, or a
    `tf.RaggedTensor` otherwise.

  #### Example:

  >>> input = [s.encode('utf8') for s in (u'G\xf6\xf6dnight', u'\U0001f60a')]
  >>> tf.strings.unicode_decode(input, 'UTF-8').to_list()
  [[71, 246, 246, 100, 110, 105, 103, 104, 116], [128522]]
  UnicodeDecodeFwith_offsetsNr   r$   _unicode_decoder   input_encodingr:   r;   replace_control_charactersr   s         r4   unicode_decoderW      sG    R ~~dOeW5 K5.&:J5EKK K K   4=z#strings.unicode_decode_with_offsetsc           	          t        j                  |d| g      5  t        | ||||d      cddd       S # 1 sw Y   yxY w)a  Decodes each string into a sequence of code points with start offsets.

  This op is similar to `tf.strings.decode(...)`, but it also returns the
  start offset for each character in its respective string.  This information
  can be used to align the characters with the original byte sequence.

  Returns a tuple `(codepoints, start_offsets)` where:

  * `codepoints[i1...iN, j]` is the Unicode codepoint for the `j`th character
    in `input[i1...iN]`, when decoded using `input_encoding`.
  * `start_offsets[i1...iN, j]` is the start byte offset for the `j`th
    character in `input[i1...iN]`, when decoded using `input_encoding`.

  Args:
    input: An `N` dimensional potentially ragged `string` tensor with shape
      `[D1...DN]`.  `N` must be statically known.
    input_encoding: String name for the unicode encoding that should be used to
      decode each string.
    errors: Specifies the response when an input string can't be converted
      using the indicated encoding. One of:
      * `'strict'`: Raise an exception for any illegal substrings.
      * `'replace'`: Replace illegal substrings with `replacement_char`.
      * `'ignore'`: Skip illegal substrings.
    replacement_char: The replacement codepoint to be used in place of invalid
      substrings in `input` when `errors='replace'`; and in place of C0 control
      characters in `input` when `replace_control_characters=True`.
    replace_control_characters: Whether to replace the C0 control characters
      `(U+0000 - U+001F)` with the `replacement_char`.
    name: A name for the operation (optional).

  Returns:
    A tuple of `N+1` dimensional tensors `(codepoints, start_offsets)`.

    * `codepoints` is an `int32` tensor with shape `[D1...DN, (num_chars)]`.
    * `offsets` is an `int64` tensor with shape `[D1...DN, (num_chars)]`.

    The returned tensors are `tf.Tensor`s if `input` is a scalar, or
    `tf.RaggedTensor`s otherwise.

  #### Example:

  >>> input = [s.encode('utf8') for s in (u'G\xf6\xf6dnight', u'\U0001f60a')]
  >>> result = tf.strings.unicode_decode_with_offsets(input, 'UTF-8')
  >>> result[0].to_list()  # codepoints
  [[71, 246, 246, 100, 110, 105, 103, 104, 116], [128522]]
  >>> result[1].to_list()  # offsets
  [[0, 1, 3, 5, 6, 7, 8, 9, 10], [0]]

  UnicodeDecodeWithOffsetsTrP   NrR   rT   s         r4   unicode_decode_with_offsetsr[      sH    r ~~d6@ J5.&:J5DJJ J JrX   zstrings.unicode_splitc           	          t        j                  |d| g      5  t        | |||dd      }t        t	        j
                  |d      |||      cddd       S # 1 sw Y   yxY w)a]  Splits each string in `input` into a sequence of Unicode code points.

  `result[i1...iN, j]` is the substring of `input[i1...iN]` that encodes its
  `j`th character, when decoded using `input_encoding`.

  Args:
    input: An `N` dimensional potentially ragged `string` tensor with shape
      `[D1...DN]`.  `N` must be statically known.
    input_encoding: String name for the unicode encoding that should be used to
      decode each string.
    errors: Specifies the response when an input string can't be converted
      using the indicated encoding. One of:
      * `'strict'`: Raise an exception for any illegal substrings.
      * `'replace'`: Replace illegal substrings with `replacement_char`.
      * `'ignore'`: Skip illegal substrings.
    replacement_char: The replacement codepoint to be used in place of invalid
      substrings in `input` when `errors='replace'`.
    name: A name for the operation (optional).

  Returns:
    A `N+1` dimensional `int32` tensor with shape `[D1...DN, (num_chars)]`.
    The returned tensor is a `tf.Tensor` if `input` is a scalar, or a
    `tf.RaggedTensor` otherwise.

  #### Example:

  >>> input = [s.encode('utf8') for s in (u'G\xf6\xf6dnight', u'\U0001f60a')]
  >>> tf.strings.unicode_split(input, 'UTF-8').to_list()
  [[b'G', b'\xc3\xb6', b'\xc3\xb6', b'd', b'n', b'i', b'g', b'h', b't'],
   [b'\xf0\x9f\x98\x8a']]
  UnicodeSplitFrP   r=   r9   r:   r;   Nr   r$   rS   rB   r   expand_dims)r   rU   r:   r;   r   
codepointss         r4   unicode_splitrb   &  se    L ~~dNUG4 + !15uNJ$$Z4&)	++ + +s   4AA z"strings.unicode_split_with_offsetsc           	          t        j                  |d| g      5  t        | |||dd      \  }}t        t	        j
                  |d      |||      }||fcddd       S # 1 sw Y   yxY w)a!  Splits each string into a sequence of code points with start offsets.

  This op is similar to `tf.strings.decode(...)`, but it also returns the
  start offset for each character in its respective string.  This information
  can be used to align the characters with the original byte sequence.

  Returns a tuple `(chars, start_offsets)` where:

  * `chars[i1...iN, j]` is the substring of `input[i1...iN]` that encodes its
    `j`th character, when decoded using `input_encoding`.
  * `start_offsets[i1...iN, j]` is the start byte offset for the `j`th
    character in `input[i1...iN]`, when decoded using `input_encoding`.

  Args:
    input: An `N` dimensional potentially ragged `string` tensor with shape
      `[D1...DN]`.  `N` must be statically known.
    input_encoding: String name for the unicode encoding that should be used to
      decode each string.
    errors: Specifies the response when an input string can't be converted
      using the indicated encoding. One of:
      * `'strict'`: Raise an exception for any illegal substrings.
      * `'replace'`: Replace illegal substrings with `replacement_char`.
      * `'ignore'`: Skip illegal substrings.
    replacement_char: The replacement codepoint to be used in place of invalid
      substrings in `input` when `errors='replace'`.
    name: A name for the operation (optional).

  Returns:
    A tuple of `N+1` dimensional tensors `(codepoints, start_offsets)`.

    * `codepoints` is an `int32` tensor with shape `[D1...DN, (num_chars)]`.
    * `offsets` is an `int64` tensor with shape `[D1...DN, (num_chars)]`.

    The returned tensors are `tf.Tensor`s if `input` is a scalar, or
    `tf.RaggedTensor`s otherwise.

  #### Example:

  >>> input = [s.encode('utf8') for s in (u'G\xf6\xf6dnight', u'\U0001f60a')]
  >>> result = tf.strings.unicode_split_with_offsets(input, 'UTF-8')
  >>> result[0].to_list()  # character substrings
  [[b'G', b'\xc3\xb6', b'\xc3\xb6', b'd', b'n', b'i', b'g', b'h', b't'],
   [b'\xf0\x9f\x98\x8a']]
  >>> result[1].to_list()  # offsets
  [[0, 1, 3, 5, 6, 7, 8, 9, 10], [0]]

  UnicodeSplitWithOffsetsFTrP   r=   r^   Nr_   )r   rU   r:   r;   r   ra   offsetscharss           r4   unicode_split_with_offsetsrg   V  sx    l ~~d5w? 	)%*:E7;=J $$Z4&)	+E
 '>	 	 	s   ;AA'c                 ,   t        j                  | d      } | j                  j                  }|t	        d      |dkD  rt        j
                  |       s%t         j                  j                  | |dz
        } n\| j                  |dz
  k  rJ| j                  t         j                  j                  | j                  || j                  z
  dz
              } t        j
                  |       r"t        j                  | j                  dg      }nt        j                  | dg      }|rt        j                  }nt        j                  } ||||||      }	|dk(  r|	j                   }
|r|	j"                  }nt         j                  j%                  |	j                   |	j&                  d	
      }
|dkD  r| j                  |
      }
|rLt         j                  j%                  |	j"                  |	j&                  d	
      }|dkD  r| j                  |      }|r|
fS |
S )z2Decodes each string into a sequence of codepoints.r   r   z)Rank of `input` must be statically known.r   rC   r=   )r   rU   r:   r;   rV   r   Fr@   )r   r%   r+   r,   r-   rA   r'   r1   rC   r(   r*   r   rF   r   r[   rW   char_valueschar_to_byte_startsrG   rE   )r   rU   r:   r;   rV   rQ   input_ndims
flat_input	decode_opflat_resultra   re   s               r4   rS   rS     s     
:
:5w
O%!!+
@
AA1_""5)((44
[1_ 5 .e			[1_	,$$

$
$
0
0%(9(99A= 1 ?@e U#""5#4#4rd;J""52$/J::I--I#'!;=+ A((J//g++;;!7!7% < IJQ))*5j**::

)
);+A+A ; g 
q((1w    zstrings.split)v1c                    t        j                  |d| g      5  t        j                  | t        j
                  d      } t        | t        j                        r/| j                  t        | j                  ||            cddd       S | j                  j                  }|dk(  r-t        t        j                  | g      ||      d   cddd       S |dk(  s|lt        j                  | ||      }t        j                  j!                  |j"                  |j$                  dddf   |j&                  d   d	      cddd       S t        t        j                  j)                  |       ||      cddd       S # 1 sw Y   yxY w)
a  Split elements of `input` based on `sep` into a `RaggedTensor`.

  Let N be the size of `input` (typically N will be the batch size). Split each
  element of `input` based on `sep` and return a `RaggedTensor` containing the
  split tokens. Empty tokens are ignored.

  Example:

  >>> tf.strings.split('hello world').numpy()
   array([b'hello', b'world'], dtype=object)
  >>> tf.strings.split(['hello world', 'a b c'])
  <tf.RaggedTensor [[b'hello', b'world'], [b'a', b'b', b'c']]>

  If `sep` is given, consecutive delimiters are not grouped together and are
  deemed to delimit empty strings. For example, `input` of `"1<>2<><>3"` and
  `sep` of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty
  string, consecutive whitespace are regarded as a single separator, and the
  result will contain no empty strings at the start or end if the string has
  leading or trailing whitespace.

  Note that the above mentioned behavior matches python's str.split.

  Args:
    input: A string `Tensor` of rank `N`, the strings to split.  If
      `rank(input)` is not known statically, then it is assumed to be `1`.
    sep: `0-D` string `Tensor`, the delimiter string.
    maxsplit: An `int`. If `maxsplit > 0`, limit of the split of the result.
    name: A name for the operation (optional).

  Raises:
    ValueError: If sep is not a string.

  Returns:
    A `RaggedTensor` of rank `N+1`, the strings split according to the
    delimiter.
  StringSplitr   dtyper   Nr   r   sepmaxsplitFr   )r   r$   r   r%   r   stringr&   r'   r(   string_split_v2r*   r+   r,   r	   r.   r   r0   r    r3   dense_shaper1   )r   rw   rx   r   r2   sparse_results         r4   rz   rz     sa   N ~~dME73 H<<V]]2E%334##
%++S(
;=	H H ;;Dqy_22E7;S(KANH H 
dl 00
S8-m''99%%$,,QT2))!,	 : H H& 

$
$
0
0
7hH'H H Hs   A&E7	>E7A)E7)E77F r/   z0delimiter is deprecated, please use sep instead.r   c                 f   t        j                  |d| g      5  t        j                  | |||      }|dk(  r|cddd       S |dk(  rTt        j
                  j                  |j                  |j                  dddf   |j                  d   d      cddd       S t        d	      # 1 sw Y   yxY w)
a   Split elements of `source` based on `delimiter`.

  Let N be the size of `source` (typically N will be the batch size). Split each
  element of `source` based on `delimiter` and return a `SparseTensor`
  or `RaggedTensor` containing the split tokens. Empty tokens are ignored.

  If `sep` is an empty string, each element of the `source` is split
  into individual strings, each containing one byte. (This includes splitting
  multibyte sequences of UTF-8.) If delimiter contains multiple bytes, it is
  treated as a set of delimiters with each considered a potential split point.

  Examples:

  >>> print(tf.compat.v1.string_split(['hello world', 'a b c']))
  SparseTensor(indices=tf.Tensor( [[0 0] [0 1] [1 0] [1 1] [1 2]], ...),
               values=tf.Tensor([b'hello' b'world' b'a' b'b' b'c'], ...),
               dense_shape=tf.Tensor([2 3], shape=(2,), dtype=int64))

  >>> print(tf.compat.v1.string_split(['hello world', 'a b c'],
  ...     result_type="RaggedTensor"))
  <tf.RaggedTensor [[b'hello', b'world'], [b'a', b'b', b'c']]>

  Args:
    source: `1-D` string `Tensor`, the strings to split.
    sep: `0-D` string `Tensor`, the delimiter character, the string should
      be length 0 or 1. Default is ' '.
    skip_empty: A `bool`. If `True`, skip the empty strings from the result.
    delimiter: deprecated alias for `sep`.
    result_type: The tensor type for the result: one of `"RaggedTensor"` or
      `"SparseTensor"`.
    name: A name for the operation (optional).

  Raises:
    ValueError: If delimiter is not a string.

  Returns:
    A `SparseTensor` or `RaggedTensor` of rank `2`, the strings split according
    to the delimiter.  The first column of the indices corresponds to the row
    in `source` and the second column corresponds to the index of the split
    component in this row.
  rs   )rw   r   r   SparseTensorNr'   r   Fr   5result_type must be 'RaggedTensor' or 'SparseTensor'.)r   r$   r   r/   r   r'   r0   r    r3   r{   r-   )sourcerw   r   r   result_typer   r|   s          r4   r/   r/     s    ` ~~dMF84 P++CJ)EMn$	P P
 
	&''99%%$,,QT2))!,	 : P P NOOP Ps    B'AB'B''B0c                 <   t        j                  d| d|      } t        j                  |d| g      5  t	        j
                  | t        j                  d      } | j                  j                  dk(  rt        j                  | d      } |dk(  r_| j                  j                  dk(  r!t        j                  | ||      cd	d	d	       S t        | ||      j                         cd	d	d	       S |d
k(  rt        | ||      cd	d	d	       S t        d      # 1 sw Y   y	xY w)aD  Split elements of `input` based on `sep`.

  Let N be the size of `input` (typically N will be the batch size). Split each
  element of `input` based on `sep` and return a `SparseTensor` or
  `RaggedTensor` containing the split tokens. Empty tokens are ignored.

  Examples:

  >>> print(tf.compat.v1.strings.split(['hello world', 'a b c']))
  SparseTensor(indices=tf.Tensor( [[0 0] [0 1] [1 0] [1 1] [1 2]], ...),
               values=tf.Tensor([b'hello' b'world' b'a' b'b' b'c'], ...),
               dense_shape=tf.Tensor([2 3], shape=(2,), dtype=int64))

  >>> print(tf.compat.v1.strings.split(['hello world', 'a b c'],
  ...     result_type="RaggedTensor"))
  <tf.RaggedTensor [[b'hello', b'world'], [b'a', b'b', b'c']]>

  If `sep` is given, consecutive delimiters are not grouped together and are
  deemed to delimit empty strings. For example, `input` of `"1<>2<><>3"` and
  `sep` of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty
  string, consecutive whitespace are regarded as a single separator, and the
  result will contain no empty strings at the start or end if the string has
  leading or trailing whitespace.

  Note that the above mentioned behavior matches python's str.split.

  Args:
    input: A string `Tensor` of rank `N`, the strings to split.  If
      `rank(input)` is not known statically, then it is assumed to be `1`.
    sep: `0-D` string `Tensor`, the delimiter character.
    maxsplit: An `int`. If `maxsplit > 0`, limit of the split of the result.
    result_type: The tensor type for the result: one of `"RaggedTensor"` or
      `"SparseTensor"`.
    source: alias for "input" argument.
    name: A name for the operation (optional).

  Raises:
    ValueError: If sep is not a string.

  Returns:
    A `SparseTensor` or `RaggedTensor` of rank `N+1`, the strings split
    according to the delimiter.
  r   r   rs   rt   r   r~   r   rv   Nr'   r   )r   deprecated_argument_lookupr   r$   r   r%   r   ry   r+   r2   r   r`   r   rz   	to_sparser-   )r   rw   rx   r   r   r   s         r4   strings_split_v1r   Q  s   ^ 
0
0uh(%
~~dME73 P<<V]]2E {{1##E1-en$			Q	))%S8LP P u#AKKMP P 
	&Uh?P P NOOP Ps   BDD+DDDinputsc           
      x    t        j                  t        j                  t        j                  | ||||xs d      S )z(For docs, see: _RAGGED_REDUCE_DOCSTRING.RaggedSegmentJoin)r   ragged_reduce_aggregater   reduce_joinunsorted_segment_join)r   axiskeepdims	separatorr   s        r4   r   r     s9     
	0	0j>>	46#6
8 8rp   zstrings.ngramsc                    t        j                  |d| g      5  |d}d}nt        |t        t        f      rPt        |d   t
        j                        rt        |d   t
        j                        st        d      |d   }|d   }n)t        |t
        j                        st        d      |}|}||dk  rt        d      ||t        d      t        j                  | d	t        j                  
      } d}	t        | t        j                        r2t        j                   t        j"                  |       dd dggd      }
d}	t        | t        j$                        s| j"                  j&                  t        d      | j"                  j&                  dk(  rt        d      | j"                  j&                  dk(  r@t        j$                  j)                  | dgd      }t+        |||||||      d   cddd       S t        j$                  j-                  | | j"                  j&                  dz
        } | j.                  dkD  rW| j1                  t+        | j2                  ||||||            }|	r t        j4                  |j6                  
      n|cddd       S |d}||d}t        |t        t        f      s|g}n|}|D ]  }|dk  s	t        d|z         t9        j:                  | j6                  | j<                  ||||||      \  }}t        j$                  j?                  ||d      }|	r t        j4                  |j6                  
      n|cddd       S # 1 sw Y   yxY w)a  Create a tensor of n-grams based on `data`.

  Creates a tensor of n-grams based on `data`. The n-grams are created by
  joining windows of `width` adjacent strings from the inner axis of `data`
  using `separator`.

  The input data can be padded on both the start and end of the sequence, if
  desired, using the `pad_values` argument. If set, `pad_values` should contain
  either a tuple of strings or a single string; the 0th element of the tuple
  will be used to pad the left side of the sequence and the 1st element of the
  tuple will be used to pad the right side of the sequence. The `padding_width`
  arg controls how many padding values are added to each side; it defaults to
  `ngram_width-1`.

  If this op is configured to not have padding, or if it is configured to add
  padding with `padding_width` set to less than ngram_width-1, it is possible
  that a sequence, or a sequence plus padding, is smaller than the ngram
  width. In that case, no ngrams will be generated for that sequence. This can
  be prevented by setting `preserve_short_sequences`, which will cause the op
  to always generate at least one ngram per non-empty sequence.

  Examples:

  >>> tf.strings.ngrams(["A", "B", "C", "D"], 2).numpy()
  array([b'A B', b'B C', b'C D'], dtype=object)
  >>> tf.strings.ngrams(["TF", "and", "keras"], 1).numpy()
  array([b'TF', b'and', b'keras'], dtype=object)

  Args:
    data: A Tensor or RaggedTensor containing the source data for the ngrams.
    ngram_width: The width(s) of the ngrams to create. If this is a list or
      tuple, the op will return ngrams of all specified arities in list order.
      Values must be non-Tensor integers greater than 0.
    separator: The separator string used between ngram elements. Must be a
      string constant, not a Tensor.
    pad_values: A tuple of (left_pad_value, right_pad_value), a single string,
      or None. If None, no padding will be added; if a single string, then that
      string will be used for both left and right padding. Values must be Python
      strings.
    padding_width: If set, `padding_width` pad values will be added to both
      sides of each sequence. Defaults to `ngram_width`-1. Must be greater than
      0. (Note that 1-grams are never padded, regardless of this value.)
    preserve_short_sequences: If true, then ensure that at least one ngram is
      generated for each input sequence.  In particular, if an input sequence is
      shorter than `min(ngram_width) + 2*pad_width`, then generate a single
      ngram containing the entire sequence.  If false, then no ngrams are
      generated for these short input sequences.
    name: The op name.

  Returns:
    A RaggedTensor of ngrams. If `data.shape=[D1...DN, S]`, then
    `output.shape=[D1...DN, NUM_NGRAMS]`, where
    `NUM_NGRAMS=S-ngram_width+1+2*padding_width`.

  Raises:
    TypeError: if `pad_values` is set to an invalid type.
    ValueError: if `pad_values`, `padding_width`, or `ngram_width` is set to an
      invalid value.
  StringNGramsNr   r   r   z7pad_values must be a string, tuple of strings, or None.z%padding_width must be greater than 0.z4pad_values must be provided if padding_width is set.data)r   ru   Fr=   r   TzRank of data must be known.zData must have rank>0r@   ri   z/All ngram_widths must be greater than 0. Got %s)r   data_splitsr   ngram_widthsleft_pad	right_pad	pad_widthpreserve_short_sequences)r    rE   r#   ) r   r$   r&   listtupleutil_compatbytes_or_text_types	TypeErrorr-   r   r%   r   ry   
tensor_libTensorr   concatr+   r'   r,   from_row_startsngramsr1   rC   rD   r    rF   r*   r   string_n_gramsrE   rG   )r   ngram_widthr   
pad_valuespadding_widthr   r   r   r   	to_tensorr{   rtoutputr   widthoutput_splitss                   r4   r   r     s   J ~~dNTF3 SChi	Ju	.A(G(GHA(G(GHEG 	GAhQ-i
K$C$CDEG 	Ghi ]Q%6>?? Z%7MNN;;60D I$
))*$$iood&;CR&@2$%GaPkidM667				!677::q 011::q ''771# 8 'b+y*m.6679SSC SCX ))55djj..2 6 4 !
k9j-)412f 09 v11*,>DiSC SCl m-"7mkD%=1!]l l &	J$% & 	&&
 +99OO!!9;FM ''77-% 8 AF .7 V//(*<BgSC SC SCs    GM=BM0MBMM'templatec           	      .   t        j                  |      st        j                  |      r|g}| j	                  |      }t        |      t        |      dz
  k7  r0t        dj                  t        |      dz
  t        |                  t        j                  |d|g      5  t        j                  |d         g}t        |      D ]  \  }}t        j                  |      r|j                  t        ||             n(|j                  t        j                   d|g|             |j                  t        j                  ||dz                    t        |      dk(  r|d   cddd       S t        j"                  |      cddd       S # 1 sw Y   yxY w)z8Version of tf.strings.format that handles RaggedTensors.r   z@num placeholders in template and num inputs must match: {} vs {}StringFormatr   {})	summarizeN)r   
is_tf_typer   rA   splitlenr-   formatr   r$   r   constant	enumerateappendragged_tensor_to_stringr   string_formatr   )	r   r   placeholderr   r   split_templateoutput_piecesir   s	            r4   r   r   ;  sn    F#}'>'>v'FXF>>+..[C'!++
 ""(&^)<q)@#f+"NP P ~~dNVH5 3 )).*;<=Mf% H5		 	 	'4UIFGZ555'Y0 	1;//q1u0EFGH =Q13 3 ##M23 3 3s   !CF-FFc                 >   |(|dk7  r#t        |t              r|dkD  st        d|z        t        j                  dd| g      5  t        j                  |       } | j                  j                  t        d      | j                  t        j                  k(  r9t        j                  | j                  dd      }| j                  d	|z   d	z         }n.| j                  t        j                   | j                              }t#        ||      cddd       S # 1 sw Y   yxY w)
a  Returns a scalar string tensor with the contents of a RaggedTensor.

  Requires that `rt.shape.rank` is not `None`.

  Note: this converts the entire `RaggedTensor` into a single string scalar.
  If you want to convert individual elements, use `tf.strings.as_string(rt)`.

  >>> rt1 = tf.ragged.constant([[1, 2, 3], [4, 5]])
  >>> ragged_tensor_to_string(rt1).numpy()
  b'[[1, 2, 3], [4, 5]]'

  >>> rt2 = tf.ragged.constant([[['a'], ['b', 'c']], [['d', 'e', 'f'], []]])
  >>> ragged_tensor_to_string(rt2).numpy()
  b"[[['a'], ['b', 'c']], [['d', 'e', 'f'], []]]"

  >>> rt3 = tf.ragged.constant([[1], [2, 3, 4, 5, 6], [], [], [7], [8, 9]])
  >>> ragged_tensor_to_string(rt3, summarize=2).numpy()
  b'[[1], [2, 3, ..., 5, 6], ..., [7], [8, 9]]'

  Args:
    rt: The RaggedTensor that should be converted to a string.
    summarize: If specified, then only the first and last `summarize` elements
      within each dimension are included in the string. If `-1` or `None`, then
      all elements are included.
  Nr=   r   z5Expected summarize to be -1 or a positive int, got %rAsStringz?RaggedTensor to_string requires that rt.shape.rank is not None.z(['\\])z\\\1')r&   intr-   r   r$   r   r%   r+   r2   ru   r   ry   r   regex_replacer*   r(   	as_string_ragged_tensor_to_string)r   r   escapedstr_ts       r4   r   r   [  s    4 	Ri%)a-
L    
~~dJ- 6		9	9"	=B	xx}} & ' ' 
xx6== ((WMg!!#-#"56e!!*"6"6r~~"FGe#E956 6 6s   CDDc                 T   | j                   j                  dk(  r| n=t        j                  fd| t	        j
                  dt        j                              dvr-t        j                  t        |       dz  k  fdfd      d	t        j                  d
      z   dz   S )aF  Returns a scalar string tensor with the contents of `string_tensor`.

  Args:
    string_tensor: A potentially ragged tensor with dtype=string.
    summarize: Include only the first and last `summarize` elements of each
      dimension.  If `-1` or `None`, then include all elements.

  Returns:
    A scalar string Tensor.
  r   c                     t        |       S N)r   )sr   s    r4   <lambda>z*_ragged_tensor_to_string.<locals>.<lambda>  s    *1i8 rp   N)fn_output_signature)r=   Nr<   c                       S r    )piecess   r4   r   z*_ragged_tensor_to_string.<locals>.<lambda>  s     rp   c                  H    t        j                   d  dg  d  gd      S )Nz...r   r   )r   r   )r   r   s   r4   r   z*_ragged_tensor_to_string.<locals>.<lambda>  s1    	  JY%&)*=> rp   [z, )r   ])r+   r2   
map_fn_libr   r   
TensorSpecr   ry   r
   _nrowsr   r   )string_tensorr   r   s    `@r4   r   r     s     "F8&11$FHF j YY}Y.	F 
z%%f=	=	CCrp   c                     t        | t        j                        r| j                  |      S t	        j
                  | |      d   S )Nr>   r   )r&   r   r'   r"   r   r+   )r   r?   s     r4   r   r     s9    223<<<**??6H5a88rp   c                     t        |       dk  rt        d      t        j                  |d|       5  t	        j
                  t        j                  | |      cddd       S # 1 sw Y   yxY w)z0RaggedTensor implementation for tf.strings.join.r   z-tf.strings.join: expected at least one input.RaggedStringJoinN)r   r-   r   r$   r   map_flat_valuesr   string_join)r   r   r   s      r4   r   r     s\    
 	[1_
D
EE
~~d.7 < 001G1G1:<< < <s   %A  A)r   )replace  N)r   r   FN)Nr=   N)NTNr~   N)NNr=   r~   NN)NNr   N) NNFN)r      N)r   N)8__doc__typingtensorflow.python.frameworkr   r   r   r   r   r   tensorflow.python.opsr   r	   r
   r   r   r   r   tensorflow.python.ops.raggedr   r   r   r   tensorflow.python.utilr   r   r   r    tensorflow.python.util.tf_exportr   add_dispatch_supportr)   rB   rW   r[   rb   rg   rS   rz   deprecated_argsr/   r   dispatch_for_apireduce_join_v2Raggedr   r   r   strUnionListRaggedOrDenser   r   rH   r   r   r   rp   r4   <module>r      s   9  3 . + < 3 + 1 & 0 6 , 9 > 8 6 8 . + 6  !	*O  "*O\ #$	 $$)	\4  %\4@ #$	 $$*.3)K  %)KX 01	 (117;@%)9J  29Jx "#	 ##)	++  $++\ /0	 '006$(	=  1=@7t ?r"	9H  #9Hx ~	TO(* ?C267P*   
7Px  	46CG>P  !>PB :445	8,, 8 68 	 $)VC  VCr :334
 	33LL--]%@%@AB C3 53>*6ZD8 #LL 9 :112<M$?$?@ < 3<rp   