"""String functions.
.. versionadded:: 1.1.0
"""
from functools import partial
import re
import pydash as pyd
from ._compat import (
html_unescape,
iteritems,
parse_qsl,
PY26,
urlencode,
urlsplit,
urlunsplit
)
__all__ = (
'camel_case',
'capitalize',
'deburr',
'ends_with',
'ensure_ends_with',
'ensure_starts_with',
'escape',
'escape_reg_exp',
'escape_re',
'explode',
'implode',
'js_match',
'js_replace',
'kebab_case',
'pad',
'pad_left',
'pad_right',
'quote',
'repeat',
'snake_case',
'starts_with',
'surround',
'trim',
'trim_left',
'trim_right',
'trunc',
'unescape',
'url',
'words',
)
HTML_ESCAPES = {
'&': '&',
'<': '<',
'>': '>',
'"': '"',
"'": ''',
'`': '`'
}
DEBURRED_LETTERS = {
'\xC0': 'A',
'\xC1': 'A',
'\xC2': 'A',
'\xC3': 'A',
'\xC4': 'A',
'\xC5': 'A',
'\xE0': 'a',
'\xE1': 'a',
'\xE2': 'a',
'\xE3': 'a',
'\xE4': 'a',
'\xE5': 'a',
'\xC7': 'C',
'\xE7': 'c',
'\xD0': 'D',
'\xF0': 'd',
'\xC8': 'E',
'\xC9': 'E',
'\xCA': 'E',
'\xCB': 'E',
'\xE8': 'e',
'\xE9': 'e',
'\xEA': 'e',
'\xEB': 'e',
'\xCC': 'I',
'\xCD': 'I',
'\xCE': 'I',
'\xCF': 'I',
'\xEC': 'i',
'\xED': 'i',
'\xEE': 'i',
'\xEF': 'i',
'\xD1': 'N',
'\xF1': 'n',
'\xD2': 'O',
'\xD3': 'O',
'\xD4': 'O',
'\xD5': 'O',
'\xD6': 'O',
'\xD8': 'O',
'\xF2': 'o',
'\xF3': 'o',
'\xF4': 'o',
'\xF5': 'o',
'\xF6': 'o',
'\xF8': 'o',
'\xD9': 'U',
'\xDA': 'U',
'\xDB': 'U',
'\xDC': 'U',
'\xF9': 'u',
'\xFA': 'u',
'\xFB': 'u',
'\xFC': 'u',
'\xDD': 'Y',
'\xFD': 'y',
'\xFF': 'y',
'\xC6': 'Ae',
'\xE6': 'ae',
'\xDE': 'Th',
'\xFE': 'th',
'\xDF': 'ss',
'\xD7': ' ',
'\xF7': ' '
}
# Use Javascript style regex to make Lo-Dash compatibility easier.
RE_WORDS = '/[A-Z]{2,}(?=[A-Z][a-z]+[0-9]*)|[A-Z]?[a-z]+[0-9]*|[A-Z]+|[0-9]+/g'
RE_LATIN1 = '/[\xC0-\xFF]/g'
[docs]def camel_case(text):
"""Converts `text` to camel case.
Args:
text (str): String to convert.
Returns:
str: String converted to camel case.
.. versionadded:: 1.1.0
"""
text = ''.join(word.title() for word in words(pyd.to_string(text)))
return text[0].lower() + text[1:]
[docs]def capitalize(text):
"""Capitalizes the first character of `text`.
Args:
text (str): String to capitalize.
Returns:
str: Capitalized string.
.. versionadded:: 1.1.0
"""
return text.capitalize()
[docs]def deburr(text):
"""Deburrs `text` by converting latin-1 supplementary letters to basic
latin letters.
Args:
text (str): String to deburr.
Returns:
str: Deburred string.
.. versionadded:: 2.0.0
"""
return js_replace(RE_LATIN1,
text,
lambda match: DEBURRED_LETTERS.get(match.group(),
match.group()))
[docs]def ends_with(text, target, position=None):
"""Checks if `text` ends with a given target string.
Args:
text (str): String to check.
target (str): String to check for.
position (int, optional): Position to search from. Defaults to
end of `text`.
Returns:
bool: Whether `text` ends with `target`.
.. versionadded:: 1.1.0
"""
text = pyd.to_string(text)
if position is None:
position = len(text)
return text[:position].endswith(target)
[docs]def ensure_ends_with(text, suffix):
"""Append a given suffix to a string, but only if the source string does
not end with that suffix.
Args:
text (str): Source string to append `suffix` to. Must not be `None`.
suffix (str): String to append to the source string if the source
string does not end with `suffix`
Must not be `None`.
Returns:
str: source string possibly extended by `suffix`
Must not be `None`.
.. versionadded:: 2.4.0
"""
return text if text.endswith(suffix) else '{0}{1}'.format(text, suffix)
[docs]def ensure_starts_with(text, prefix):
"""Prepend a given prefix to a string, but only if the source string does
not start with that prefix.
Args:
text (str): Source string to prepend `prefix` to. Must not be `None`.
suffix (str): String to prepend to the source string if the source
string does not start with `prefix`.
Must not be `None`.
Returns:
str: source string possibly prefixed by `prefix`
.. versionadded:: 2.4.0
"""
return text if text.startswith(prefix) else '{1}{0}'.format(text, prefix)
[docs]def escape(text):
r"""Converts the characters ``&``, ``<``, ``>``, ``"``, ``'``, and ``\``` in
`text` to their corresponding HTML entities.
Args:
text (str): String to escape.
Returns:
str: HTML escaped string.
.. versionadded:: 1.0.0
.. versionchanged:: 1.1.0
Moved function to Strings module.
"""
# NOTE: Not using _compat.html_escape because Lo-Dash escapes certain chars
# differently (e.g. "'" isn't escaped by html_escape() but is by Lo-Dash).
return ''.join(HTML_ESCAPES.get(char, char)
for char in pyd.to_string(text))
[docs]def escape_reg_exp(text):
"""Escapes the RegExp special characters in `text`.
Args:
text (str): String to escape.
Returns:
str: RegExp escaped string.
.. versionadded:: 1.1.0
"""
return re.escape(text)
escape_re = escape_reg_exp
[docs]def explode(text, delimiter=None):
"""Splits `text` on `delimiter`. If `delimiter` not provided or ``None``,
then `text` is split on every character.
Args:
text (str): String to explode.
delimiter (str, optional): Delimiter string to split on. Defaults to
``None``.
Returns:
list: Exploded string.
.. versionadded:: 2.0.0
"""
if delimiter:
ret = text.split(delimiter)
else:
# Splits text into list of characters.
ret = list(text)
return ret
[docs]def implode(array, delimiter=''):
"""Joins an iterable into a string using `delimiter` between each element.
Args:
array (iterable): Iterable to implode.
delimiter (str): Delimiter to using when joining. Defaults to ``''``.
Returns:
str: Imploded iterable.
.. versionadded:: 2.0.0
"""
return delimiter.join(array)
[docs]def js_match(reg_exp, text):
"""Return list of matches using Javascript style regular expression.
Args:
reg_exp (str): Javascript style regular expression.
text (str): String to evaluate.
Returns:
list: List of matches.
.. versionadded:: 2.0.0
"""
return js_to_py_re_find(reg_exp)(text)
[docs]def js_replace(reg_exp, text, repl):
"""Replace `text` with `repl` using Javascript style regular expression to
find matches.
Args:
reg_exp (str): Javascript style regular expression.
text (str): String to evaluate.
repl (str): Replacement string.
Returns:
str: Modified string.
.. versionadded:: 2.0.0
"""
return js_to_py_re_replace(reg_exp)(text, repl)
[docs]def kebab_case(text):
"""Converts `text` to kebab case (a.k.a. spinal case).
Args:
text (str): String to convert.
Returns:
str: String converted to kebab case.
.. versionadded:: 1.1.0
"""
return '-'.join(wrd.lower() for wrd in words(pyd.to_string(text)) if wrd)
[docs]def pad(text, length, chars=' '):
"""Pads `text` on the left and right sides if it is shorter than the
given padding length. The `chars` string may be truncated if the number of
padding characters can't be evenly divided by the padding length.
Args:
text (str): String to pad.
length (int): Amount to pad.
chars (str, optional): Chars to pad with. Defaults to ``" "``.
Returns:
str: Padded string.
.. versionadded:: 1.1.0
"""
text = pyd.to_string(text)
text_len = len(text)
length = max((length, text_len))
padding = (length - text_len)
left_pad = padding // 2
right_pad = padding - left_pad
text = repeat(chars, left_pad) + text + repeat(chars, right_pad)
if len(text) > length:
# This handles cases when `chars` is more than one character.
text = text[left_pad:-right_pad]
return text
[docs]def pad_left(text, length, chars=' '):
"""Pads `text` on the left side if it is shorter than the given padding
length. The `chars` string may be truncated if the number of padding
characters can't be evenly divided by the padding length.
Args:
text (str): String to pad.
length (int): Amount to pad.
chars (str, optional): Chars to pad with. Defaults to ``" "``.
Returns:
str: Padded string.
.. versionadded:: 1.1.0
"""
text = pyd.to_string(text)
length = max((length, len(text)))
return (repeat(chars, length) + text)[-length:]
[docs]def pad_right(text, length, chars=' '):
"""Pads `text` on the right side if it is shorter than the given padding
length. The `chars` string may be truncated if the number of padding
characters can't be evenly divided by the padding length.
Args:
text (str): String to pad.
length (int): Amount to pad.
chars (str, optional): Chars to pad with. Defaults to ``" "``.
Returns:
str: Padded string.
.. versionadded:: 1.1.0
"""
text = pyd.to_string(text)
length = max((length, len(text)))
return (text + repeat(chars, length))[:length]
[docs]def quote(text, quote_char='"'):
"""
Quote a string with another string.
Args:
text (str): String to be quoted
quote_char (str): the quote character. Defaults to `"`
Returns:
str: the quoted string.
.. versionadded:: 2.4.0
"""
return surround(text, quote_char)
[docs]def repeat(text, n=0):
"""Repeats the given string `n` times.
Args:
text (str): String to repeat.
n (int, optional): Number of times to repeat the string.
Returns:
str: Repeated string.
.. versionadded:: 1.1.0
"""
return pyd.to_string(text) * int(n)
[docs]def snake_case(text):
"""Converts `text` to snake case.
Args:
text (str): String to convert.
Returns:
str: String converted to snake case.
.. versionadded:: 1.1.0
"""
return '_'.join(wrd.lower() for wrd in words(pyd.to_string(text)) if wrd)
[docs]def starts_with(text, target, position=None):
"""Checks if `text` starts with a given target string.
Args:
text (str): String to check.
target (str): String to check for.
position (int, optional): Position to search from. Defaults to
beginning of `text`.
Returns:
bool: Whether `text` starts with `target`.
.. versionadded:: 1.1.0
"""
text = pyd.to_string(text)
if position is None:
position = 0
return text[position:].startswith(target)
[docs]def surround(text, wrapper):
"""
Surround a string with another string.
Args:
text (str): String to surround with `wrapper`
wrapper (str): String by which `text` is to be surrounded
Returns:
str: surrounded string.
.. versionadded:: 2.4.0
"""
return '{1}{0}{1}'.format(text, wrapper)
[docs]def trim(text, chars=None):
"""Removes leading and trailing whitespace or specified characters from
`text`.
Args:
text (str): String to trim.
chars (str, optional): Specific characters to remove.
Returns:
str: Trimmed string.
.. versionadded:: 1.1.0
"""
return pyd.to_string(text).strip(chars)
[docs]def trim_left(text, chars=None):
"""Removes leading whitespace or specified characters from `text`.
Args:
text (str): String to trim.
chars (str, optional): Specific characters to remove.
Returns:
str: Trimmed string.
.. versionadded:: 1.1.0
"""
return pyd.to_string(text).lstrip(chars)
[docs]def trim_right(text, chars=None):
"""Removes trailing whitespace or specified characters from `text`.
Args:
text (str): String to trim.
chars (str, optional): Specific characters to remove.
Returns:
str: Trimmed string.
.. versionadded:: 1.1.0
"""
return pyd.to_string(text).rstrip(chars)
[docs]def trunc(text, length=30, omission='...', separator=None):
"""Truncates `text` if it is longer than the given maximum string length.
The last characters of the truncated string are replaced with the omission
string which defaults to ``...``.
Args:
text (str): String to truncate.
length (int, optional): Maximum string length. Defaults to ``30``.
omission (str, optional): String to indicate text is omitted.
separator (mixed, optional): Separator pattern to truncate to.
Returns:
str: Truncated string.
.. versionadded:: 1.1.0
"""
omission_len = len(omission)
text_len = length - omission_len
text = pyd.to_string(text)[:text_len]
trunc_len = len(text)
if pyd.is_string(separator):
trunc_len = text.rfind(separator)
elif pyd.is_re(separator):
last = None
for match in separator.finditer(text):
last = match
if last is not None:
trunc_len = last.start()
return text[:trunc_len] + omission
[docs]def unescape(text):
"""The inverse of :func:`escape`. This method converts the HTML entities
``&``, ``<``, ``>``, ``"``, ``'``, and ````` in
`text` to their corresponding characters.
Args:
text (str): String to unescape.
Returns:
str: HTML unescaped string.
.. versionadded:: 1.0.0
.. versionchanged:: 1.1.0
Moved to Strings module.
"""
return html_unescape(text)
[docs]def url(*paths, **params):
"""Combines a series of URL paths into a single URL. Optionally, pass in
keyword arguments to append query parameters.
Args:
paths (str): URL paths to combine.
Keyword Args:
params (str, optional): Query parameters.
Returns:
str: URL string.
.. versionadded:: 2.2.0
"""
paths_list = []
params_list = flatten_url_params(params)
for path in paths:
scheme, netloc, path, query, fragment = urlsplit(path)
query = parse_qsl(query)
params_list += query
paths_list.append(urlunsplit((scheme, netloc, path, '', fragment)))
path = delimitedpathjoin('/', *paths_list)
scheme, netloc, path, query, fragment = urlsplit(path)
query = urlencode(params_list)
return urlunsplit((scheme, netloc, path, query, fragment))
[docs]def words(text):
"""Return list of words contained in `text`.
Args:
text (str): String to split.
Returns:
list: List of words.
.. versionadded:: 2.0.0
"""
return js_match(RE_WORDS, text)
#
# Utility functions not a part of main API
#
def js_to_py_re_find(reg_exp):
"""Return Python regular expression matching function based on Javascript
style regexp.
"""
pattern, options = reg_exp[1:].rsplit('/', 1)
flags = re.I if 'i' in options else 0
def find(text): # pylint: disable=missing-docstring
if 'g' in options:
results = re.findall(pattern, text, flags=flags)
else:
results = re.search(pattern, text, flags=flags)
if results:
results = [results.group()]
else:
results = []
return results
return find
def js_to_py_re_replace(reg_exp):
"""Return Python regular expression substitution function based on
Javascript style regexp.
"""
pattern, options = reg_exp[1:].rsplit('/', 1)
count = 0 if 'g' in options else 1
flags = re.I if 'i' in options else 0
def replace(text, repl): # pylint: disable=missing-docstring
if PY26: # pragma: no cover
sub = partial(re.compile(pattern, flags=flags).sub, count=count)
else:
sub = partial(re.sub, pattern, count=count, flags=flags)
return sub(repl, text)
return replace
def delimitedpathjoin(delimiter, *paths):
"""Join delimited path using specified delimiter.
>>> assert delimitedpathjoin('.', '') == ''
>>> assert delimitedpathjoin('.', '.') == '.'
>>> assert delimitedpathjoin('.', ['', '.a']) == '.a'
>>> assert delimitedpathjoin('.', ['a', '.']) == 'a.'
>>> assert delimitedpathjoin('.', ['', '.a', '', '', 'b']) == '.a.b'
>>> ret = '.a.b.c.d.e.'
>>> assert delimitedpathjoin('.', ['.a.', 'b.', '.c', 'd', 'e.']) == ret
>>> assert delimitedpathjoin('.', ['a', 'b', 'c']) == 'a.b.c'
>>> ret = 'a.b.c.d.e.f'
>>> assert delimitedpathjoin('.', ['a.b', '.c.d.', '.e.f']) == ret
>>> ret = '.a.b.c.1.'
>>> assert delimitedpathjoin('.', '.', 'a', 'b', 'c', 1, '.') == ret
>>> assert delimitedpathjoin('.', []) == ''
"""
paths = [pyd.to_string(path) for path in pyd.flatten_deep(paths) if path]
if len(paths) == 1:
# Special case where there's no need to join anything.
# Doing this because if path==[delimiter], then an extra delimiter
# would be added if the else clause ran instead.
path = paths[0]
else:
leading = delimiter if paths and paths[0].startswith(delimiter) else ''
trailing = delimiter if paths and paths[-1].endswith(delimiter) else ''
middle = delimiter.join([path.strip(delimiter)
for path in paths if path.strip(delimiter)])
path = ''.join([leading, middle, trailing])
return path
def flatten_url_params(params):
"""Flatten URL params into list of tuples. If any param value is a list or
tuple, then map each value to the param key.
>>> params = [('a', 1), ('a', [2, 3])]
>>> assert flatten_url_params(params) == [('a', 1), ('a', 2), ('a', 3)]
>>> params = {'a': [1, 2, 3]}
>>> assert flatten_url_params(params) == [('a', 1), ('a', 2), ('a', 3)]
"""
if isinstance(params, dict):
params = list(iteritems(params))
flattened = []
for param, value in params:
if isinstance(value, (list, tuple)):
flattened += zip([param] * len(value), value)
else:
flattened.append((param, value))
return flattened