"""
String functions.
.. versionadded:: 1.1.0
"""
import html
import math
import re
import typing
import typing as t
import unicodedata
from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
import pydash as pyd
from .helpers import UNSET, Unset
from .types import NumberT
__all__ = (
"camel_case",
"capitalize",
"chop",
"chop_right",
"chars",
"clean",
"count_substr",
"deburr",
"decapitalize",
"ends_with",
"ensure_ends_with",
"ensure_starts_with",
"escape",
"escape_reg_exp",
"has_substr",
"human_case",
"insert_substr",
"join",
"kebab_case",
"lines",
"lower_case",
"lower_first",
"number_format",
"pad",
"pad_end",
"pad_start",
"pascal_case",
"predecessor",
"prune",
"quote",
"reg_exp_js_match",
"reg_exp_js_replace",
"reg_exp_replace",
"repeat",
"replace",
"replace_end",
"replace_start",
"separator_case",
"series_phrase",
"series_phrase_serial",
"slugify",
"snake_case",
"split",
"start_case",
"starts_with",
"strip_tags",
"substr_left",
"substr_left_end",
"substr_right",
"substr_right_end",
"successor",
"surround",
"swap_case",
"title_case",
"to_lower",
"to_upper",
"trim",
"trim_end",
"trim_start",
"truncate",
"unescape",
"unquote",
"upper_case",
"upper_first",
"url",
"words",
)
T = t.TypeVar("T")
T2 = t.TypeVar("T2")
class JSRegExp:
"""
Javascript-style regular expression pattern.
Converts a Javascript-style regular expression to the equivalent Python version.
"""
def __init__(self, reg_exp: str) -> None:
pattern, options = reg_exp[1:].rsplit("/", 1)
self._global = "g" in options
self._ignore_case = "i" in options
flags = re.I if self._ignore_case else 0
self.pattern = re.compile(pattern, flags=flags)
def find(self, text: str) -> t.List[str]:
"""Return list of regular expression matches."""
if self._global:
results = self.pattern.findall(text)
else:
res = self.pattern.search(text)
if res:
results = [res.group()]
else:
results = []
return results
def replace(self, text: str, repl: t.Union[str, t.Callable[[re.Match], str]]) -> str:
"""Replace parts of text that match the regular expression."""
count = 0 if self._global else 1
return self.pattern.sub(repl, text, count=count)
HTML_ESCAPES = {"&": "&", "<": "<", ">": ">", '"': """, "'": "'", "`": "`"}
DEBURRED_LETTERS = {
"\xc0": "A",
"\xc1": "A",
"\xc2": "A",
"\xc3": "A",
"\xc4": "A",
"\xc5": "A",
"\xe0": "a",
"\xe1": "a",
"\xe2": "a",
"\xe3": "a",
"\xe4": "a",
"\xe5": "a",
"\xc7": "C",
"\xe7": "c",
"\xd0": "D",
"\xf0": "d",
"\xc8": "E",
"\xc9": "E",
"\xca": "E",
"\xcb": "E",
"\xe8": "e",
"\xe9": "e",
"\xea": "e",
"\xeb": "e",
"\xcc": "I",
"\xcd": "I",
"\xce": "I",
"\xcf": "I",
"\xec": "i",
"\xed": "i",
"\xee": "i",
"\xef": "i",
"\xd1": "N",
"\xf1": "n",
"\xd2": "O",
"\xd3": "O",
"\xd4": "O",
"\xd5": "O",
"\xd6": "O",
"\xd8": "O",
"\xf2": "o",
"\xf3": "o",
"\xf4": "o",
"\xf5": "o",
"\xf6": "o",
"\xf8": "o",
"\xd9": "U",
"\xda": "U",
"\xdb": "U",
"\xdc": "U",
"\xf9": "u",
"\xfa": "u",
"\xfb": "u",
"\xfc": "u",
"\xdd": "Y",
"\xfd": "y",
"\xff": "y",
"\xc6": "Ae",
"\xe6": "ae",
"\xde": "Th",
"\xfe": "th",
"\xdf": "ss",
"\xd7": " ",
"\xf7": " ",
}
# Use Javascript style regex to make Lo-Dash compatibility easier.
# Lodash Regex definitions: https://github.com/lodash/lodash/blob/master/.internal/unicodeWords.js
# References: https://github.com/lodash/lodash/blob/master/words.js#L8
RS_ASCII_WORDS = "/[^\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]+/g"
RS_LATIN1 = "/[\xc0-\xff]/g"
# Used to compose unicode character classes.
RS_ASTRAL_RANGE = "\\ud800-\\udfff"
RS_COMBO_MARKS_RANGE = "\\u0300-\\u036f"
RE_COMBO_HALF_MARKS_RANGE = "\\ufe20-\\ufe2f"
RS_COMBO_SYMBOLS_RANGE = "\\u20d0-\\u20ff"
RS_COMBO_MARKS_EXTENDED_RANGE = "\\u1ab0-\\u1aff"
RS_COMBO_MARKS_SUPPLEMENT_RANGE = "\\u1dc0-\\u1dff"
RS_COMBO_RANGE = (
RS_COMBO_MARKS_RANGE
+ RE_COMBO_HALF_MARKS_RANGE
+ RS_COMBO_SYMBOLS_RANGE
+ RS_COMBO_MARKS_EXTENDED_RANGE
+ RS_COMBO_MARKS_SUPPLEMENT_RANGE
)
RS_DINGBAT_RANGE = "\\u2700-\\u27bf"
RS_LOWER_RANGE = "a-z\\xdf-\\xf6\\xf8-\\xff"
RS_MATH_OP_RANGE = "\\xac\\xb1\\xd7\\xf7"
RS_NON_CHAR_RANGE = "\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf"
RS_PUNCTUATION_RANGE = "\\u2000-\\u206f"
RS_SPACE_RANGE = (
" \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\"
"u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\"
"u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000"
)
RS_UPPER_RANGE = "A-Z\\xc0-\\xd6\\xd8-\\xde"
RS_VAR_RANGE = "\\ufe0e\\ufe0f"
RS_BREAK_RANGE = RS_MATH_OP_RANGE + RS_NON_CHAR_RANGE + RS_PUNCTUATION_RANGE + RS_SPACE_RANGE
# Used to compose unicode capture groups.
RS_APOS = "['\u2019]"
RS_BREAK = f"[{RS_BREAK_RANGE}]"
RS_COMBO = f"[{RS_COMBO_RANGE}]"
RS_DIGIT = "\\d"
RS_DINGBAT = f"[{RS_DINGBAT_RANGE}]"
RS_LOWER = f"[{RS_LOWER_RANGE}]"
RS_MISC = (
f"[^{RS_ASTRAL_RANGE}{RS_BREAK_RANGE}{RS_DIGIT}"
f"{RS_DINGBAT_RANGE}{RS_LOWER_RANGE}{RS_UPPER_RANGE}]"
)
RS_FITZ = "\\ud83c[\\udffb-\\udfff]"
RS_MODIFIER = f"(?:{RS_COMBO}|{RS_FITZ})"
RS_NON_ASTRAL = f"[^{RS_ASTRAL_RANGE}]"
RS_REGIONAL = "(?:\\ud83c[\\udde6-\\uddff]){2}"
RS_SURR_PAIR = "[\\ud800-\\udbff][\\udc00-\\udfff]"
RS_UPPER = f"[{RS_UPPER_RANGE}]"
RS_ZWJ = "\\u200d"
# Used to compose unicode regexes.
RS_MISC_LOWER = f"(?:{RS_LOWER}|{RS_MISC})"
RS_MISC_UPPER = f"(?:{RS_UPPER}|{RS_MISC})"
RS_OPT_CONTR_LOWER = f"(?:{RS_APOS}(?:d|ll|m|re|s|t|ve))?"
RS_OPT_CONTR_UPPER = f"(?:{RS_APOS}(?:D|LL|M|RE|S|T|VE))?"
RE_OPT_MOD = f"{RS_MODIFIER}?"
RS_OPT_VAR = f"[{RS_VAR_RANGE}]?"
RS_OPT_JOIN = (
f"(?:{RS_ZWJ}(?:{RS_NON_ASTRAL}|{RS_REGIONAL}|{RS_SURR_PAIR}){RS_OPT_VAR}{RE_OPT_MOD})*"
)
RS_ORD_LOWER = "\\d*(?:1st|2nd|3rd|(?![123])\\dth)(?=\\b|[A-Z_])"
RS_ORD_UPPER = "\\d*(?:1ST|2ND|3RD|(?![123])\\dTH)(?=\\b|[a-z_])"
RS_SEQ = RS_OPT_VAR + RE_OPT_MOD + RS_OPT_JOIN
RS_EMOJI = f"(?:{RS_DINGBAT}|{RS_REGIONAL}|{RS_SURR_PAIR}){RS_SEQ}"
RS_HAS_UNICODE_WORD = "[a-z][A-Z]|[A-Z]{2}[a-z]|[0-9][a-zA-Z]|[a-zA-Z][0-9]|[^a-zA-Z0-9 ]"
RS_UNICODE_WORDS = (
f"/"
f"{RS_UPPER}?{RS_LOWER}+{RS_OPT_CONTR_LOWER}(?={RS_BREAK}|{RS_UPPER}|$)"
f"|{RS_MISC_UPPER}+{RS_OPT_CONTR_UPPER}(?={RS_BREAK}|{RS_UPPER}{RS_MISC_LOWER}|$)"
f"|{RS_UPPER}?{RS_MISC_LOWER}+{RS_OPT_CONTR_LOWER}"
f"|{RS_UPPER}+{RS_OPT_CONTR_UPPER}"
f"|{RS_ORD_UPPER}"
f"|{RS_ORD_LOWER}"
f"|{RS_DIGIT}+"
f"|{RS_EMOJI}"
f"/g"
)
# Compiled regexes for use in functions.
JS_RE_ASCII_WORDS = JSRegExp(RS_ASCII_WORDS)
JS_RE_UNICODE_WORDS = JSRegExp(RS_UNICODE_WORDS)
JS_RE_LATIN1 = JSRegExp(RS_LATIN1)
RE_HAS_UNICODE_WORD = re.compile(RS_HAS_UNICODE_WORD)
RE_APOS = re.compile(RS_APOS)
RE_HTML_TAGS = re.compile(r"<\/?[^>]+>")
[docs]
def camel_case(text: t.Any) -> str:
"""
Converts `text` to camel case.
Args:
text: String to convert.
Returns:
String converted to camel case.
Example:
>>> camel_case("FOO BAR_bAz")
'fooBarBAz'
.. versionadded:: 1.1.0
.. versionchanged:: 5.0.0
Improved unicode word support.
"""
text = "".join(word.title() for word in compounder(text))
return text[:1].lower() + text[1:]
[docs]
def capitalize(text: t.Any, strict: bool = True) -> str:
"""
Capitalizes the first character of `text`.
Args:
text: String to capitalize.
strict: Whether to cast rest of string to lower case. Defaults to ``True``.
Returns:
Capitalized string.
Example:
>>> capitalize("once upon a TIME")
'Once upon a time'
>>> capitalize("once upon a TIME", False)
'Once upon a TIME'
.. versionadded:: 1.1.0
.. versionchanged:: 3.0.0
Added `strict` option.
"""
text = pyd.to_string(text)
return text.capitalize() if strict else text[:1].upper() + text[1:]
[docs]
def chars(text: t.Any) -> t.List[str]:
"""
Split `text` into a list of single characters.
Args:
text: String to split up.
Returns:
List of individual characters.
Example:
>>> chars("onetwo")
['o', 'n', 'e', 't', 'w', 'o']
.. versionadded:: 3.0.0
"""
return list(pyd.to_string(text))
[docs]
def chop(text: t.Any, step: int) -> t.List[str]:
"""
Break up `text` into intervals of length `step`.
Args:
text: String to chop.
step: Interval to chop `text`.
Returns:
List of chopped characters. If `text` is `None` an empty list is returned.
Example:
>>> chop("abcdefg", 3)
['abc', 'def', 'g']
.. versionadded:: 3.0.0
"""
if text is None:
return []
text = pyd.to_string(text)
if step <= 0:
chopped = [text]
else:
chopped = [text[i : i + step] for i in range(0, len(text), step)]
return chopped
[docs]
def chop_right(text: t.Any, step: int) -> t.List[str]:
"""
Like :func:`chop` except `text` is chopped from right.
Args:
text: String to chop.
step: Interval to chop `text`.
Returns:
List of chopped characters.
Example:
>>> chop_right("abcdefg", 3)
['a', 'bcd', 'efg']
.. versionadded:: 3.0.0
"""
if text is None:
return []
text = pyd.to_string(text)
if step <= 0:
chopped = [text]
else:
text_len = len(text)
chopped = [text[-(i + step) : text_len - i] for i in range(0, text_len, step)][::-1]
return chopped
[docs]
def clean(text: t.Any) -> str:
"""
Trim and replace multiple spaces with a single space.
Args:
text: String to clean.
Returns:
Cleaned string.
Example:
>>> clean("a b c d")
'a b c d'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
return " ".join(pyd.compact(text.split()))
[docs]
def count_substr(text: t.Any, subtext: t.Any) -> int:
"""
Count the occurrences of `subtext` in `text`.
Args:
text: Source string to count from.
subtext: String to count.
Returns:
Number of occurrences of `subtext` in `text`.
Example:
>>> count_substr("aabbccddaabbccdd", "bc")
2
.. versionadded:: 3.0.0
"""
if text is None or subtext is None:
return 0
text = pyd.to_string(text)
subtext = pyd.to_string(subtext)
return text.count(subtext)
[docs]
def deburr(text: t.Any) -> str:
"""
Deburrs `text` by converting latin-1 supplementary letters to basic latin letters.
Args:
text: String to deburr.
Returns:
Deburred string.
Example:
>>> deburr("déjà vu")
'...
>>> "deja vu"
'deja vu'
.. versionadded:: 2.0.0
"""
text = pyd.to_string(text)
return JS_RE_LATIN1.replace(
text, lambda match: DEBURRED_LETTERS.get(match.group(), match.group())
)
[docs]
def decapitalize(text: t.Any) -> str:
"""
Decaptitalizes the first character of `text`.
Args:
text: String to decapitalize.
Returns:
Decapitalized string.
Example:
>>> decapitalize("FOO BAR")
'fOO BAR'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
return text[:1].lower() + text[1:]
[docs]
def ends_with(text: t.Any, target: t.Any, position: t.Union[int, None] = None) -> bool:
"""
Checks if `text` ends with a given target string.
Args:
text: String to check.
target: String to check for.
position: Position to search from. Defaults to end of `text`.
Returns:
Whether `text` ends with `target`.
Example:
>>> ends_with("abc def", "def")
True
>>> ends_with("abc def", 4)
False
.. versionadded:: 1.1.0
"""
target = pyd.to_string(target)
text = pyd.to_string(text)
if position is None:
position = len(text)
return text[:position].endswith(target)
[docs]
def ensure_ends_with(text: t.Any, suffix: t.Any) -> str:
"""
Append a given suffix to a string, but only if the source string does not end with that suffix.
Args:
text: Source string to append `suffix` to.
suffix: String to append to the source string if the source string does not end with
`suffix`.
Returns:
source string possibly extended by `suffix`.
Example:
>>> ensure_ends_with("foo bar", "!")
'foo bar!'
>>> ensure_ends_with("foo bar!", "!")
'foo bar!'
.. versionadded:: 2.4.0
"""
text = pyd.to_string(text)
suffix = pyd.to_string(suffix)
if text.endswith(suffix):
return text
return f"{text}{suffix}"
[docs]
def ensure_starts_with(text: t.Any, prefix: t.Any) -> str:
"""
Prepend a given prefix to a string, but only if the source string does not start with that
prefix.
Args:
text: Source string to prepend `prefix` to.
prefix: String to prepend to the source string if the source string does not start
with `prefix`.
Returns:
source string possibly prefixed by `prefix`
Example:
>>> ensure_starts_with("foo bar", "Oh my! ")
'Oh my! foo bar'
>>> ensure_starts_with("Oh my! foo bar", "Oh my! ")
'Oh my! foo bar'
.. versionadded:: 2.4.0
"""
text = pyd.to_string(text)
prefix = pyd.to_string(prefix)
if text.startswith(prefix):
return text
return f"{prefix}{text}"
[docs]
def escape(text: t.Any) -> str:
r"""
Converts the characters ``&``, ``<``, ``>``, ``"``, ``'``, and ``\``` in `text` to their
corresponding HTML entities.
Args:
text: String to escape.
Returns:
HTML escaped string.
Example:
>>> escape('"1 > 2 && 3 < 4"')
'"1 > 2 && 3 < 4"'
.. versionadded:: 1.0.0
.. versionchanged:: 1.1.0
Moved function to :mod:`pydash.strings`.
"""
text = pyd.to_string(text)
# NOTE: Not using html.escape because Lo-Dash escapes certain chars differently (e.g. `'` isn't
# escaped by html.escape() but is by Lo-Dash).
return "".join(HTML_ESCAPES.get(char, char) for char in text)
[docs]
def escape_reg_exp(text: t.Any) -> str:
"""
Escapes the RegExp special characters in `text`.
Args:
text: String to escape.
Returns:
RegExp escaped string.
Example:
>>> escape_reg_exp("[()]")
'\\\\[\\\\(\\\\)\\\\]'
.. versionadded:: 1.1.0
.. versionchanged:: 4.0.0
Removed alias ``escape_re``
"""
text = pyd.to_string(text)
return re.escape(text)
[docs]
def has_substr(text: t.Any, subtext: t.Any) -> bool:
"""
Returns whether `subtext` is included in `text`.
Args:
text: String to search.
subtext: String to search for.
Returns:
Whether `subtext` is found in `text`.
Example:
>>> has_substr("abcdef", "bc")
True
>>> has_substr("abcdef", "bb")
False
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
subtext = pyd.to_string(subtext)
return text.find(subtext) >= 0
[docs]
def human_case(text: t.Any) -> str:
"""
Converts `text` to human case which has only the first letter capitalized and each word
separated by a space.
Args:
text: String to convert.
Returns:
String converted to human case.
Example:
>>> human_case("abc-def_hij lmn")
'Abc def hij lmn'
>>> human_case("user_id")
'User'
.. versionadded:: 3.0.0
.. versionchanged:: 5.0.0
Improved unicode word support.
"""
return (
pyd.chain(text)
.snake_case()
.reg_exp_replace("_id$", "")
.replace("_", " ")
.capitalize()
.value()
)
[docs]
def insert_substr(text: t.Any, index: int, subtext: t.Any) -> str:
"""
Insert `subtext` in `text` starting at position `index`.
Args:
text: String to add substring to.
index: String index to insert into.
subtext: String to insert.
Returns:
Modified string.
Example:
>>> insert_substr("abcdef", 3, "--")
'abc--def'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
subtext = pyd.to_string(subtext)
return text[:index] + subtext + text[index:]
[docs]
def join(array: t.Iterable[t.Any], separator: t.Any = "") -> str:
"""
Joins an iterable into a string using `separator` between each element.
Args:
array: Iterable to implode.
separator: Separator to using when joining. Defaults to ``''``.
Returns:
Joined string.
Example:
>>> join(["a", "b", "c"]) == "abc"
True
>>> join([1, 2, 3, 4], "&") == "1&2&3&4"
True
>>> join("abcdef", "-") == "a-b-c-d-e-f"
True
.. versionadded:: 2.0.0
.. versionchanged:: 4.0.0
Removed alias ``implode``.
"""
return pyd.to_string(separator).join(pyd.map_(array or (), pyd.to_string))
[docs]
def kebab_case(text: t.Any) -> str:
"""
Converts `text` to kebab case (a.k.a. spinal case).
Args:
text: String to convert.
Returns:
String converted to kebab case.
Example:
>>> kebab_case("a b c_d-e!f")
'a-b-c-d-e-f'
.. versionadded:: 1.1.0
.. versionchanged:: 5.0.0
Improved unicode word support.
"""
return "-".join(word.lower() for word in compounder(text) if word)
[docs]
def lines(text: t.Any) -> t.List[str]:
r"""
Split lines in `text` into an array.
Args:
text: String to split.
Returns:
String split by lines.
Example:
>>> lines("a\nb\r\nc")
['a', 'b', 'c']
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
return text.splitlines()
[docs]
def lower_case(text: t.Any) -> str:
"""
Converts string to lower case as space separated words.
Args:
text: String to convert.
Returns:
String converted to lower case as space separated words.
Example:
>>> lower_case("fooBar")
'foo bar'
>>> lower_case("--foo-Bar--")
'foo bar'
>>> lower_case('/?*Foo10/;"B*Ar')
'foo 10 b ar'
.. versionadded:: 4.0.0
.. versionchanged:: 5.0.0
Improved unicode word support.
"""
return " ".join(compounder(text)).lower()
[docs]
def lower_first(text: str) -> str:
"""
Converts the first character of string to lower case.
Args:
text: String passed in by the user.
Returns:
String in which the first character is converted to lower case.
Example:
>>> lower_first("FRED")
'fRED'
>>> lower_first("Foo Bar")
'foo Bar'
>>> lower_first("1foobar")
'1foobar'
>>> lower_first(";foobar")
';foobar'
.. versionadded:: 4.0.0
"""
return text[:1].lower() + text[1:]
[docs]
def pad(text: t.Any, length: int, chars: t.Any = " ") -> str:
"""
Pads `text` on the left and right sides if it is shorter than the given padding length. The
`chars` string may be truncated if the number of padding characters can't be evenly divided by
the padding length.
Args:
text: String to pad.
length: Amount to pad.
chars: Characters to pad with. Defaults to ``" "``.
Returns:
Padded string.
Example:
>>> pad("abc", 5)
' abc '
>>> pad("abc", 6, "x")
'xabcxx'
>>> pad("abc", 5, "...")
'.abc.'
.. versionadded:: 1.1.0
.. versionchanged:: 3.0.0
Fix handling of multiple `chars` so that padded string isn't over padded.
"""
# pylint: disable=redefined-outer-name
text = pyd.to_string(text)
text_len = len(text)
if text_len >= length:
return text
mid = (length - text_len) / 2.0
left_len = int(math.floor(mid))
right_len = int(math.ceil(mid))
chars = pad_end("", right_len, chars)
return chars[:left_len] + text + chars
[docs]
def pad_end(text: t.Any, length: int, chars: t.Any = " ") -> str:
"""
Pads `text` on the right side if it is shorter than the given padding length. The `chars` string
may be truncated if the number of padding characters can't be evenly divided by the padding
length.
Args:
text: String to pad.
length: Amount to pad.
chars: Characters to pad with. Defaults to ``" "``.
Returns:
Padded string.
Example:
>>> pad_end("abc", 5)
'abc '
>>> pad_end("abc", 5, ".")
'abc..'
.. versionadded:: 1.1.0
.. versionchanged:: 4.0.0
Renamed from ``pad_right`` to ``pad_end``.
"""
# pylint: disable=redefined-outer-name
text = pyd.to_string(text)
length = max((length, len(text)))
return (text + repeat(chars, length))[:length]
[docs]
def pad_start(text: t.Any, length: int, chars: t.Any = " ") -> str:
"""
Pads `text` on the left side if it is shorter than the given padding length. The `chars` string
may be truncated if the number of padding characters can't be evenly divided by the padding
length.
Args:
text: String to pad.
length: Amount to pad.
chars: Characters to pad with. Defaults to ``" "``.
Returns:
Padded string.
Example:
>>> pad_start("abc", 5)
' abc'
>>> pad_start("abc", 5, ".")
'..abc'
.. versionadded:: 1.1.0
.. versionchanged:: 4.0.0
Renamed from ``pad_left`` to ``pad_start``.
"""
# pylint: disable=redefined-outer-name
text = pyd.to_string(text)
length = max(length, len(text))
return (repeat(chars, length) + text)[-length:]
[docs]
def pascal_case(text: t.Any, strict: bool = True) -> str:
"""
Like :func:`camel_case` except the first letter is capitalized.
Args:
text: String to convert.
strict: Whether to cast rest of string to lower case. Defaults to ``True``.
Returns:
String converted to class case.
Example:
>>> pascal_case("FOO BAR_bAz")
'FooBarBaz'
>>> pascal_case("FOO BAR_bAz", False)
'FooBarBAz'
.. versionadded:: 3.0.0
.. versionchanged:: 5.0.0
Improved unicode word support.
"""
text = pyd.to_string(text)
if strict:
text = text.lower()
return capitalize(camel_case(text), strict=False)
[docs]
def predecessor(char: t.Any) -> str:
"""
Return the predecessor character of `char`.
Args:
char: Character to find the predecessor of.
Returns:
Predecessor character.
Example:
>>> predecessor("c")
'b'
>>> predecessor("C")
'B'
>>> predecessor("3")
'2'
.. versionadded:: 3.0.0
"""
char = pyd.to_string(char)
return chr(ord(char) - 1)
[docs]
def prune(text: t.Any, length: int = 0, omission: str = "...") -> str:
"""
Like :func:`truncate` except it ensures that the pruned string doesn't exceed the original
length, i.e., it avoids half-chopped words when truncating. If the pruned text + `omission` text
is longer than the original text, then the original text is returned.
Args:
text: String to prune.
length: Target prune length. Defaults to ``0``.
omission: Omission text to append to the end of the pruned string. Defaults
to ``'...'``.
Returns:
Pruned string.
Example:
>>> prune("Fe fi fo fum", 5)
'Fe fi...'
>>> prune("Fe fi fo fum", 6)
'Fe fi...'
>>> prune("Fe fi fo fum", 7)
'Fe fi...'
>>> prune("Fe fi fo fum", 8, ",,,")
'Fe fi fo,,,'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
text_len = len(text)
omission_len = len(omission)
if text_len <= length:
return text
# Replace non-alphanumeric chars with whitespace.
def repl(match):
char = match.group(0)
return " " if char.upper() == char.lower() else char
subtext = reg_exp_replace(text[: length + 1], r".(?=\W*\w*$)", repl)
if re.match(r"\w\w", subtext[-2:]):
# Last two characters are alphanumeric. Remove last "word" from end of string so that we
# prune to the next whole word.
subtext = reg_exp_replace(subtext, r"\s*\S+$", "")
else:
# Last character (at least) is whitespace. So remove that character as well as any other
# whitespace.
subtext = subtext[:-1].rstrip()
subtext_len = len(subtext)
# Only add omission text if doing so will result in a string that is equal to or smaller than
# the original.
if (subtext_len + omission_len) <= text_len:
text = text[:subtext_len] + omission
return text
[docs]
def quote(text: t.Any, quote_char: t.Any = '"') -> str:
"""
Quote a string with another string.
Args:
text: String to be quoted.
quote_char: the quote character. Defaults to ``'"'``.
Returns:
the quoted string.
Example:
>>> quote("To be or not to be")
'"To be or not to be"'
>>> quote("To be or not to be", "'")
"'To be or not to be'"
.. versionadded:: 2.4.0
"""
return surround(text, quote_char)
[docs]
def reg_exp_js_match(text: t.Any, reg_exp: str) -> t.List[str]:
"""
Return list of matches using Javascript style regular expression.
Args:
text: String to evaluate.
reg_exp: Javascript style regular expression.
Returns:
List of matches.
Example:
>>> reg_exp_js_match("aaBBcc", "/bb/")
[]
>>> reg_exp_js_match("aaBBcc", "/bb/i")
['BB']
>>> reg_exp_js_match("aaBBccbb", "/bb/i")
['BB']
>>> reg_exp_js_match("aaBBccbb", "/bb/gi")
['BB', 'bb']
.. versionadded:: 2.0.0
.. versionchanged:: 3.0.0
Reordered arguments to make `text` first.
.. versionchanged:: 4.0.0
Renamed from ``js_match`` to ``reg_exp_js_match``.
"""
text = pyd.to_string(text)
return JSRegExp(reg_exp).find(text)
[docs]
def reg_exp_js_replace(
text: t.Any, reg_exp: str, repl: t.Union[str, t.Callable[[re.Match], str]]
) -> str:
"""
Replace `text` with `repl` using Javascript style regular expression to find matches.
Args:
text: String to evaluate.
reg_exp: Javascript style regular expression.
repl: Replacement string or callable.
Returns:
Modified string.
Example:
>>> reg_exp_js_replace("aaBBcc", "/bb/", "X")
'aaBBcc'
>>> reg_exp_js_replace("aaBBcc", "/bb/i", "X")
'aaXcc'
>>> reg_exp_js_replace("aaBBccbb", "/bb/i", "X")
'aaXccbb'
>>> reg_exp_js_replace("aaBBccbb", "/bb/gi", "X")
'aaXccX'
.. versionadded:: 2.0.0
.. versionchanged:: 3.0.0
Reordered arguments to make `text` first.
.. versionchanged:: 4.0.0
Renamed from ``js_replace`` to ``reg_exp_js_replace``.
"""
text = pyd.to_string(text)
if not pyd.is_function(repl):
repl = pyd.to_string(repl)
return JSRegExp(reg_exp).replace(text, repl)
[docs]
def reg_exp_replace(
text: t.Any,
pattern: t.Any,
repl: t.Union[str, t.Callable[[re.Match], str]],
ignore_case: bool = False,
count: int = 0,
) -> str:
"""
Replace occurrences of regex `pattern` with `repl` in `text`. Optionally, ignore case when
replacing. Optionally, set `count` to limit number of replacements.
Args:
text: String to replace.
pattern: Pattern to find and replace.
repl: String to substitute `pattern` with.
ignore_case: Whether to ignore case when replacing. Defaults to ``False``.
count: Maximum number of occurrences to replace. Defaults to ``0`` which
replaces all.
Returns:
Replaced string.
Example:
>>> reg_exp_replace("aabbcc", "b", "X")
'aaXXcc'
>>> reg_exp_replace("aabbcc", "B", "X", ignore_case=True)
'aaXXcc'
>>> reg_exp_replace("aabbcc", "b", "X", count=1)
'aaXbcc'
>>> reg_exp_replace("aabbcc", "[ab]", "X")
'XXXXcc'
.. versionadded:: 3.0.0
.. versionchanged:: 4.0.0
Renamed from ``re_replace`` to ``reg_exp_replace``.
"""
if pattern is None:
return pyd.to_string(text)
return replace(text, pattern, repl, ignore_case=ignore_case, count=count, escape=False)
[docs]
def repeat(text: t.Any, n: t.SupportsInt = 0) -> str:
"""
Repeats the given string `n` times.
Args:
text: String to repeat.
n: Number of times to repeat the string.
Returns:
Repeated string.
Example:
>>> repeat(".", 5)
'.....'
.. versionadded:: 1.1.0
"""
return pyd.to_string(text) * int(n)
[docs]
def replace(
text: t.Any,
pattern: t.Any,
repl: t.Union[str, t.Callable[[re.Match], str]],
ignore_case: bool = False,
count: int = 0,
escape: bool = True,
from_start: bool = False,
from_end: bool = False,
) -> str:
"""
Replace occurrences of `pattern` with `repl` in `text`. Optionally, ignore case when replacing.
Optionally, set `count` to limit number of replacements.
Args:
text: String to replace.
pattern: Pattern to find and replace.
repl: String to substitute `pattern` with.
ignore_case: Whether to ignore case when replacing. Defaults to ``False``.
count: Maximum number of occurrences to replace. Defaults to ``0`` which
replaces all.
escape: Whether to escape `pattern` when searching. This is needed if a
literal replacement is desired when `pattern` may contain special regular expression
characters. Defaults to ``True``.
from_start: Whether to limit replacement to start of string.
from_end: Whether to limit replacement to end of string.
Returns:
Replaced string.
Example:
>>> replace("aabbcc", "b", "X")
'aaXXcc'
>>> replace("aabbcc", "B", "X", ignore_case=True)
'aaXXcc'
>>> replace("aabbcc", "b", "X", count=1)
'aaXbcc'
>>> replace("aabbcc", "[ab]", "X")
'aabbcc'
>>> replace("aabbcc", "[ab]", "X", escape=False)
'XXXXcc'
.. versionadded:: 3.0.0
.. versionchanged:: 4.1.0
Added ``from_start`` and ``from_end`` arguments.
.. versionchanged:: 5.0.0
Added support for ``pattern`` as ``typing.Pattern`` object.
"""
text = pyd.to_string(text)
if pattern is None:
return text
if not pyd.is_function(repl):
repl = pyd.to_string(repl)
flags = re.IGNORECASE if ignore_case else 0
if isinstance(pattern, typing.Pattern):
pat = pattern
else:
pattern = pyd.to_string(pattern)
if escape:
pattern = re.escape(pattern)
if from_start and not pattern.startswith("^"):
pattern = "^" + pattern
if from_end and not pattern.endswith("$"):
pattern += "$"
pat = re.compile(pattern, flags=flags)
return pat.sub(repl, text, count=count)
[docs]
def replace_end(
text: t.Any,
pattern: t.Any,
repl: t.Union[str, t.Callable[[re.Match], str]],
ignore_case: bool = False,
escape: bool = True,
) -> str:
"""
Like :func:`replace` except it only replaces `text` with `repl` if `pattern` mathces the end of
`text`.
Args:
text: String to replace.
pattern: Pattern to find and replace.
repl: String to substitute `pattern` with.
ignore_case: Whether to ignore case when replacing. Defaults to ``False``.
escape: Whether to escape `pattern` when searching. This is needed if a
literal replacement is desired when `pattern` may contain special regular expression
characters. Defaults to ``True``.
Returns:
Replaced string.
Example:
>>> replace_end("aabbcc", "b", "X")
'aabbcc'
>>> replace_end("aabbcc", "c", "X")
'aabbcX'
.. versionadded:: 4.1.0
"""
return replace(text, pattern, repl, ignore_case=ignore_case, escape=escape, from_end=True)
[docs]
def replace_start(
text: t.Any,
pattern: t.Any,
repl: t.Union[str, t.Callable[[re.Match], str]],
ignore_case: bool = False,
escape: bool = True,
) -> str:
"""
Like :func:`replace` except it only replaces `text` with `repl` if `pattern` mathces the start
of `text`.
Args:
text: String to replace.
pattern: Pattern to find and replace.
repl: String to substitute `pattern` with.
ignore_case: Whether to ignore case when replacing. Defaults to ``False``.
escape: Whether to escape `pattern` when searching. This is needed if a
literal replacement is desired when `pattern` may contain special regular expression
characters. Defaults to ``True``.
Returns:
Replaced string.
Example:
>>> replace_start("aabbcc", "b", "X")
'aabbcc'
>>> replace_start("aabbcc", "a", "X")
'Xabbcc'
.. versionadded:: 4.1.0
"""
return replace(text, pattern, repl, ignore_case=ignore_case, escape=escape, from_start=True)
[docs]
def separator_case(text: t.Any, separator: str) -> str:
"""
Splits `text` on words and joins with `separator`.
Args:
text: String to convert.
separator: Separator to join words with.
Returns:
Converted string.
Example:
>>> separator_case("a!!b___c.d", "-")
'a-b-c-d'
.. versionadded:: 3.0.0
.. versionchanged:: 5.0.0
Improved unicode word support.
"""
return separator.join(word.lower() for word in words(text) if word)
[docs]
def series_phrase(
items: t.List[t.Any],
separator: t.Any = ", ",
last_separator: t.Any = " and ",
serial: bool = False,
) -> str:
"""
Join items into a grammatical series phrase, e.g., ``"item1, item2, item3 and item4"``.
Args:
items: List of string items to join.
separator: Item separator. Defaults to ``', '``.
last_separator: Last item separator. Defaults to ``' and '``.
serial: Whether to include `separator` with `last_separator` when number of
items is greater than 2. Defaults to ``False``.
Returns:
Joined string.
Example:
>>> series_phrase(["apples", "bananas", "peaches"])
'apples, bananas and peaches'
>>> series_phrase(["apples", "bananas", "peaches"], serial=True)
'apples, bananas, and peaches'
>>> series_phrase(["apples", "bananas", "peaches"], "; ", ", or ")
'apples; bananas, or peaches'
.. versionadded:: 3.0.0
"""
items = pyd.chain(items).map(pyd.to_string).compact().value()
item_count = len(items)
separator = pyd.to_string(separator)
last_separator = pyd.to_string(last_separator)
if item_count > 2 and serial:
last_separator = separator.rstrip() + last_separator
if item_count >= 2:
items = items[:-2] + [last_separator.join(items[-2:])]
return separator.join(items)
[docs]
def series_phrase_serial(
items: t.List[t.Any], separator: t.Any = ", ", last_separator: t.Any = " and "
) -> str:
"""
Join items into a grammatical series phrase using a serial separator, e.g., ``"item1, item2,
item3, and item4"``.
Args:
items: List of string items to join.
separator: Item separator. Defaults to ``', '``.
last_separator: Last item separator. Defaults to ``' and '``.
Returns:
Joined string.
Example:
>>> series_phrase_serial(["apples", "bananas", "peaches"])
'apples, bananas, and peaches'
.. versionadded:: 3.0.0
"""
return series_phrase(items, separator, last_separator, serial=True)
[docs]
def slugify(text: t.Any, separator: str = "-") -> str:
"""
Convert `text` into an ASCII slug which can be used safely in URLs. Incoming `text` is converted
to unicode and noramlzied using the ``NFKD`` form. This results in some accented characters
being converted to their ASCII "equivalent" (e.g. ``é`` is converted to ``e``). Leading and
trailing whitespace is trimmed and any remaining whitespace or other special characters without
an ASCII equivalent are replaced with ``-``.
Args:
text: String to slugify.
separator: Separator to use. Defaults to ``'-'``.
Returns:
Slugified string.
Example:
>>> slugify("This is a slug.") == "this-is-a-slug"
True
>>> slugify("This is a slug.", "+") == "this+is+a+slug"
True
.. versionadded:: 3.0.0
.. versionchanged:: 5.0.0
Improved unicode word support.
.. versionchanged:: 7.0.0
Remove single quotes from output.
"""
normalized = (
unicodedata.normalize("NFKD", pyd.to_string(text))
.encode("ascii", "ignore")
.decode("utf8")
.replace("'", "")
)
return separator_case(normalized, separator)
[docs]
def snake_case(text: t.Any) -> str:
"""
Converts `text` to snake case.
Args:
text: String to convert.
Returns:
String converted to snake case.
Example:
>>> snake_case("This is Snake Case!")
'this_is_snake_case'
.. versionadded:: 1.1.0
.. versionchanged:: 4.0.0
Removed alias ``underscore_case``.
.. versionchanged:: 5.0.0
Improved unicode word support.
"""
return "_".join(word.lower() for word in compounder(text) if word)
[docs]
def split(text: t.Any, separator: t.Union[str, Unset, None] = UNSET) -> t.List[str]:
"""
Splits `text` on `separator`. If `separator` not provided, then `text` is split on whitespace.
If `separator` is falsey, then `text` is split on every character.
Args:
text: String to explode.
separator: Separator string to split on. Defaults to ``NoValue``.
Returns:
Split string.
Example:
>>> split("one potato, two potatoes, three potatoes, four!")
['one', 'potato,', 'two', 'potatoes,', 'three', 'potatoes,', 'four!']
>>> split("one potato, two potatoes, three potatoes, four!", ",")
['one potato', ' two potatoes', ' three potatoes', ' four!']
.. versionadded:: 2.0.0
.. versionchanged:: 3.0.0
Changed `separator` default to ``NoValue`` and supported splitting on whitespace by default.
.. versionchanged:: 4.0.0
Removed alias ``explode``.
"""
text = pyd.to_string(text)
if separator is UNSET:
ret = text.split()
elif separator:
ret = text.split(separator)
else:
ret = chars(text)
return ret
[docs]
def start_case(text: t.Any) -> str:
"""
Convert `text` to start case.
Args:
text: String to convert.
Returns:
String converted to start case.
Example:
>>> start_case("fooBar")
'Foo Bar'
.. versionadded:: 3.1.0
.. versionchanged:: 5.0.0
Improved unicode word support.
"""
return " ".join(capitalize(word, strict=False) for word in compounder(text))
[docs]
def starts_with(text: t.Any, target: t.Any, position: int = 0) -> bool:
"""
Checks if `text` starts with a given target string.
Args:
text: String to check.
target: String to check for.
position: Position to search from. Defaults to beginning of `text`.
Returns:
Whether `text` starts with `target`.
Example:
>>> starts_with("abcdef", "a")
True
>>> starts_with("abcdef", "b")
False
>>> starts_with("abcdef", "a", 1)
False
.. versionadded:: 1.1.0
"""
text = pyd.to_string(text)
target = pyd.to_string(target)
return text[position:].startswith(target)
[docs]
def substr_left(text: t.Any, subtext: str) -> str:
"""
Searches `text` from left-to-right for `subtext` and returns a substring consisting of the
characters in `text` that are to the left of `subtext` or all string if no match found.
Args:
text: String to partition.
subtext: String to search for.
Returns:
Substring to left of `subtext`.
Example:
>>> substr_left("abcdefcdg", "cd")
'ab'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
return text.partition(subtext)[0] if subtext else text
[docs]
def substr_left_end(text: t.Any, subtext: str) -> str:
"""
Searches `text` from right-to-left for `subtext` and returns a substring consisting of the
characters in `text` that are to the left of `subtext` or all string if no match found.
Args:
text: String to partition.
subtext: String to search for.
Returns:
Substring to left of `subtext`.
Example:
>>> substr_left_end("abcdefcdg", "cd")
'abcdef'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
return text.rpartition(subtext)[0] or text if subtext else text
[docs]
def substr_right(text: t.Any, subtext: str) -> str:
"""
Searches `text` from right-to-left for `subtext` and returns a substring consisting of the
characters in `text` that are to the right of `subtext` or all string if no match found.
Args:
text: String to partition.
subtext: String to search for.
Returns:
Substring to right of `subtext`.
Example:
>>> substr_right("abcdefcdg", "cd")
'efcdg'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
return text.partition(subtext)[2] or text if subtext else text
[docs]
def substr_right_end(text: t.Any, subtext: str) -> str:
"""
Searches `text` from left-to-right for `subtext` and returns a substring consisting of the
characters in `text` that are to the right of `subtext` or all string if no match found.
Args:
text: String to partition.
subtext: String to search for.
Returns:
Substring to right of `subtext`.
Example:
>>> substr_right_end("abcdefcdg", "cd")
'g'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
return text.rpartition(subtext)[2] if subtext else text
[docs]
def successor(char: t.Any) -> str:
"""
Return the successor character of `char`.
Args:
char: Character to find the successor of.
Returns:
Successor character.
Example:
>>> successor("b")
'c'
>>> successor("B")
'C'
>>> successor("2")
'3'
.. versionadded:: 3.0.0
"""
char = pyd.to_string(char)
return chr(ord(char) + 1)
[docs]
def surround(text: t.Any, wrapper: t.Any) -> str:
"""
Surround a string with another string.
Args:
text: String to surround with `wrapper`.
wrapper: String by which `text` is to be surrounded.
Returns:
Surrounded string.
Example:
>>> surround("abc", '"')
'"abc"'
>>> surround("abc", "!")
'!abc!'
.. versionadded:: 2.4.0
"""
text = pyd.to_string(text)
wrapper = pyd.to_string(wrapper)
return f"{wrapper}{text}{wrapper}"
[docs]
def swap_case(text: t.Any) -> str:
"""
Swap case of `text` characters.
Args:
text: String to swap case.
Returns:
String with swapped case.
Example:
>>> swap_case("aBcDeF")
'AbCdEf'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
return text.swapcase()
[docs]
def title_case(text: t.Any) -> str:
"""
Convert `text` to title case.
Args:
text: String to convert.
Returns:
String converted to title case.
Example:
>>> title_case("bob's shop")
"Bob's Shop"
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
# NOTE: Can't use text.title() since it doesn't handle apostrophes.
return " ".join(word.capitalize() for word in re.split(" ", text))
[docs]
def to_lower(text: t.Any) -> str:
"""
Converts the given :attr:`text` to lower text.
Args:
text: String to convert.
Returns:
String converted to lower case.
Example:
>>> to_lower("--Foo-Bar--")
'--foo-bar--'
>>> to_lower("fooBar")
'foobar'
>>> to_lower("__FOO_BAR__")
'__foo_bar__'
.. versionadded:: 4.0.0
"""
return pyd.to_string(text).lower()
[docs]
def to_upper(text: t.Any) -> str:
"""
Converts the given :attr:`text` to upper text.
Args:
text: String to convert.
Returns:
String converted to upper case.
Example:
>>> to_upper("--Foo-Bar--")
'--FOO-BAR--'
>>> to_upper("fooBar")
'FOOBAR'
>>> to_upper("__FOO_BAR__")
'__FOO_BAR__'
.. versionadded:: 4.0.0
"""
return pyd.to_string(text).upper()
[docs]
def trim(text: t.Any, chars: t.Union[str, None] = None) -> str:
r"""
Removes leading and trailing whitespace or specified characters from `text`.
Args:
text: String to trim.
chars: Specific characters to remove.
Returns:
Trimmed string.
Example:
>>> trim(" abc efg\r\n ")
'abc efg'
.. versionadded:: 1.1.0
"""
# pylint: disable=redefined-outer-name
text = pyd.to_string(text)
return text.strip(chars)
[docs]
def trim_end(text: t.Any, chars: t.Union[str, None] = None) -> str:
r"""
Removes trailing whitespace or specified characters from `text`.
Args:
text: String to trim.
chars: Specific characters to remove.
Returns:
Trimmed string.
Example:
>>> trim_end(" abc efg\r\n ")
' abc efg'
.. versionadded:: 1.1.0
.. versionchanged:: 4.0.0
Renamed from ``trim_right`` to ``trim_end``.
"""
text = pyd.to_string(text)
return text.rstrip(chars)
[docs]
def trim_start(text: t.Any, chars: t.Union[str, None] = None) -> str:
r"""
Removes leading whitespace or specified characters from `text`.
Args:
text: String to trim.
chars: Specific characters to remove.
Returns:
Trimmed string.
Example:
>>> trim_start(" abc efg\r\n ")
'abc efg\r\n '
.. versionadded:: 1.1.0
.. versionchanged:: 4.0.0
Renamed from ``trim_left`` to ``trim_start``.
"""
text = pyd.to_string(text)
return text.lstrip(chars)
[docs]
def truncate(
text: t.Any,
length: int = 30,
omission: str = "...",
separator: t.Union[str, re.Pattern, None] = None,
) -> str:
"""
Truncates `text` if it is longer than the given maximum string length. The last characters of
the truncated string are replaced with the omission string which defaults to ``...``.
Args:
text: String to truncate.
length: Maximum string length. Defaults to ``30``.
omission: String to indicate text is omitted.
separator: Separator pattern to truncate to.
Returns:
Truncated string.
Example:
>>> truncate("hello world", 5)
'he...'
>>> truncate("hello world", 5, "..")
'hel..'
>>> truncate("hello world", 10)
'hello w...'
>>> truncate("hello world", 10, separator=" ")
'hello...'
.. versionadded:: 1.1.0
.. versionchanged:: 4.0.0
Removed alias ``trunc``.
"""
text = pyd.to_string(text)
if len(text) <= length:
return text
omission_len = len(omission)
text_len = length - omission_len
text = text[:text_len]
trunc_len = len(text)
if pyd.is_string(separator):
trunc_len = text.rfind(separator)
elif pyd.is_reg_exp(separator):
last = None
for match in separator.finditer(text):
last = match
if last is not None:
trunc_len = last.start()
return text[:trunc_len] + omission
[docs]
def unescape(text: t.Any) -> str:
"""
The inverse of :func:`escape`. This method converts the HTML entities ``&``, ``<``,
``>``, ``"``, ``'``, and ````` in `text` to their corresponding characters.
Args:
text: String to unescape.
Returns:
HTML unescaped string.
Example:
>>> results = unescape(""1 > 2 && 3 < 4"")
>>> results == '"1 > 2 && 3 < 4"'
True
.. versionadded:: 1.0.0
.. versionchanged:: 1.1.0
Moved to :mod:`pydash.strings`.
"""
text = pyd.to_string(text)
return html.unescape(text)
[docs]
def upper_case(text: t.Any) -> str:
"""
Converts string to upper case, as space separated words.
Args:
text: String to be converted to uppercase.
Returns:
String converted to uppercase, as space separated words.
Example:
>>> upper_case("--foo-bar--")
'FOO BAR'
>>> upper_case("fooBar")
'FOO BAR'
>>> upper_case('/?*Foo10/;"B*Ar')
'FOO 10 B AR'
.. versionadded:: 4.0.0
.. versionchanged:: 5.0.0
Improved unicode word support.
"""
return " ".join(compounder(text)).upper()
[docs]
def upper_first(text: str) -> str:
"""
Converts the first character of string to upper case.
Args:
text: String passed in by the user.
Returns:
String in which the first character is converted to upper case.
Example:
>>> upper_first("fred")
'Fred'
>>> upper_first("foo bar")
'Foo bar'
>>> upper_first("1foobar")
'1foobar'
>>> upper_first(";foobar")
';foobar'
.. versionadded:: 4.0.0
"""
return text[:1].upper() + text[1:]
[docs]
def unquote(text: t.Any, quote_char: t.Any = '"') -> str:
"""
Unquote `text` by removing `quote_char` if `text` begins and ends with it.
Args:
text: String to unquote.
quote_char: Quote character to remove. Defaults to `"`.
Returns:
Unquoted string.
Example:
>>> unquote('"abc"')
'abc'
>>> unquote('"abc"', "#")
'"abc"'
>>> unquote("#abc", "#")
'#abc'
>>> unquote("#abc#", "#")
'abc'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
inner = text[1:-1]
if text == f"{quote_char}{inner}{quote_char}":
text = inner
return text
[docs]
def url(*paths: t.Any, **params: t.Any) -> str:
"""
Combines a series of URL paths into a single URL. Optionally, pass in keyword arguments to
append query parameters.
Args:
paths: URL paths to combine.
Keyword Args:
params: Query parameters.
Returns:
URL string.
Example:
>>> link = url("a", "b", ["c", "d"], "/", q="X", y="Z")
>>> path, params = link.split("?")
>>> path == "a/b/c/d/"
True
>>> set(params.split("&")) == set(["q=X", "y=Z"])
True
.. versionadded:: 2.2.0
"""
# allow reassignment different type
paths = pyd.chain(paths).flatten_deep().map(pyd.to_string).value() # type: ignore
paths_list = []
params_list = flatten_url_params(params)
for path in paths:
scheme, netloc, path, query, fragment = urlsplit(path)
query = parse_qsl(query)
params_list += query
paths_list.append(urlunsplit((scheme, netloc, path, "", fragment)))
path = delimitedpathjoin("/", *paths_list)
scheme, netloc, path, query, fragment = urlsplit(path)
query = urlencode(params_list)
return urlunsplit((scheme, netloc, path, query, fragment))
[docs]
def words(text: t.Any, pattern: t.Union[str, None] = None) -> t.List[str]:
"""
Return list of words contained in `text`.
References:
https://github.com/lodash/lodash/blob/master/words.js#L30
Args:
text: String to split.
pattern: Custom pattern to split words on. Defaults to ``None``.
Returns:
List of words.
Example:
>>> words("a b, c; d-e")
['a', 'b', 'c', 'd', 'e']
>>> words("fred, barney, & pebbles", "/[^, ]+/g")
['fred', 'barney', '&', 'pebbles']
.. versionadded:: 2.0.0
.. versionchanged:: 3.2.0
Added `pattern` argument.
.. versionchanged:: 3.2.0
Improved matching for one character words.
.. versionchanged:: 5.0.0
Improved unicode word support.
"""
text = pyd.to_string(text)
if pattern is None:
if has_unicode_word(text):
reg_exp = JS_RE_UNICODE_WORDS
else:
reg_exp = JS_RE_ASCII_WORDS
else:
reg_exp = JSRegExp(pattern)
return reg_exp.find(text)
#
# Utility functions not a part of main API
#
def compounder(text):
"""
Remove single quote before passing into words() to match Lodash-style outputs.
Required by certain functions such as kebab_case, camel_case, start_case etc.
References:
https://github.com/lodash/lodash/blob/4.17.15/lodash.js#L4968
"""
return words(deburr(RE_APOS.sub("", pyd.to_string(text))))
def has_unicode_word(text):
"""
Check if the text contains unicode or requires more complex regex to handle.
References:
https://github.com/lodash/lodash/blob/master/words.js#L3
"""
result = RE_HAS_UNICODE_WORD.search(text)
return bool(result)
def delimitedpathjoin(delimiter, *paths):
"""
Join delimited path using specified delimiter.
>>> assert delimitedpathjoin(".", "") == ""
>>> assert delimitedpathjoin(".", ".") == "."
>>> assert delimitedpathjoin(".", ["", ".a"]) == ".a"
>>> assert delimitedpathjoin(".", ["a", "."]) == "a."
>>> assert delimitedpathjoin(".", ["", ".a", "", "", "b"]) == ".a.b"
>>> ret = ".a.b.c.d.e."
>>> assert delimitedpathjoin(".", [".a.", "b.", ".c", "d", "e."]) == ret
>>> assert delimitedpathjoin(".", ["a", "b", "c"]) == "a.b.c"
>>> ret = "a.b.c.d.e.f"
>>> assert delimitedpathjoin(".", ["a.b", ".c.d.", ".e.f"]) == ret
>>> ret = ".a.b.c.1."
>>> assert delimitedpathjoin(".", ".", "a", "b", "c", 1, ".") == ret
>>> assert delimitedpathjoin(".", []) == ""
"""
paths = [pyd.to_string(path) for path in pyd.flatten_deep(paths) if path]
if len(paths) == 1:
# Special case where there's no need to join anything. Doing this because if
# path==[delimiter], then an extra delimiter would be added if the else clause ran instead.
path = paths[0]
else:
leading = delimiter if paths and paths[0].startswith(delimiter) else ""
trailing = delimiter if paths and paths[-1].endswith(delimiter) else ""
middle = delimiter.join([path.strip(delimiter) for path in paths if path.strip(delimiter)])
path = "".join([leading, middle, trailing])
return path
def flatten_url_params(
params: t.Union[
t.Dict[T, t.Union[T2, t.Iterable[T2]]],
t.List[t.Tuple[T, t.Union[T2, t.Iterable[T2]]]],
],
) -> t.List[t.Tuple[T, T2]]:
"""
Flatten URL params into list of tuples. If any param value is a list or tuple, then map each
value to the param key.
>>> params = [("a", 1), ("a", [2, 3])]
>>> assert flatten_url_params(params) == [("a", 1), ("a", 2), ("a", 3)]
>>> params = {"a": [1, 2, 3]}
>>> assert flatten_url_params(params) == [("a", 1), ("a", 2), ("a", 3)]
"""
if isinstance(params, dict):
params = list(params.items())
flattened: t.List = []
for param, value in params:
if isinstance(value, (list, tuple)):
flattened += zip([param] * len(value), value)
else:
flattened.append((param, value))
return flattened