Source code for pydash.strings

"""
String functions.

.. versionadded:: 1.1.0
"""

import html
import math
import re
import typing
import unicodedata
from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit

import pydash as pyd

from .helpers import UNSET


__all__ = (
    "camel_case",
    "capitalize",
    "chop",
    "chop_right",
    "chars",
    "clean",
    "count_substr",
    "deburr",
    "decapitalize",
    "ends_with",
    "ensure_ends_with",
    "ensure_starts_with",
    "escape",
    "escape_reg_exp",
    "has_substr",
    "human_case",
    "insert_substr",
    "join",
    "kebab_case",
    "lines",
    "lower_case",
    "lower_first",
    "number_format",
    "pad",
    "pad_end",
    "pad_start",
    "pascal_case",
    "predecessor",
    "prune",
    "quote",
    "reg_exp_js_match",
    "reg_exp_js_replace",
    "reg_exp_replace",
    "repeat",
    "replace",
    "replace_end",
    "replace_start",
    "separator_case",
    "series_phrase",
    "series_phrase_serial",
    "slugify",
    "snake_case",
    "split",
    "start_case",
    "starts_with",
    "strip_tags",
    "substr_left",
    "substr_left_end",
    "substr_right",
    "substr_right_end",
    "successor",
    "surround",
    "swap_case",
    "title_case",
    "to_lower",
    "to_upper",
    "trim",
    "trim_end",
    "trim_start",
    "truncate",
    "unescape",
    "unquote",
    "upper_case",
    "upper_first",
    "url",
    "words",
)


class JSRegExp:
    """
    Javascript-style regular expression pattern.

    Converts a Javascript-style regular expression to the equivalent Python version.
    """

    def __init__(self, reg_exp):
        pattern, options = reg_exp[1:].rsplit("/", 1)

        self._global = "g" in options
        self._ignore_case = "i" in options

        flags = re.I if self._ignore_case else 0
        self.pattern = re.compile(pattern, flags=flags)

    def find(self, text):
        """Return list of regular expression matches."""
        if self._global:
            results = self.pattern.findall(text)
        else:
            res = self.pattern.search(text)
            if res:
                results = [res.group()]
            else:
                results = []
        return results

    def replace(self, text, repl):
        """Replace parts of text that match the regular expression."""
        count = 0 if self._global else 1
        return self.pattern.sub(repl, text, count=count)


HTML_ESCAPES = {"&": "&amp;", "<": "&lt;", ">": "&gt;", '"': "&quot;", "'": "&#39;", "`": "&#96;"}

DEBURRED_LETTERS = {
    "\xC0": "A",
    "\xC1": "A",
    "\xC2": "A",
    "\xC3": "A",
    "\xC4": "A",
    "\xC5": "A",
    "\xE0": "a",
    "\xE1": "a",
    "\xE2": "a",
    "\xE3": "a",
    "\xE4": "a",
    "\xE5": "a",
    "\xC7": "C",
    "\xE7": "c",
    "\xD0": "D",
    "\xF0": "d",
    "\xC8": "E",
    "\xC9": "E",
    "\xCA": "E",
    "\xCB": "E",
    "\xE8": "e",
    "\xE9": "e",
    "\xEA": "e",
    "\xEB": "e",
    "\xCC": "I",
    "\xCD": "I",
    "\xCE": "I",
    "\xCF": "I",
    "\xEC": "i",
    "\xED": "i",
    "\xEE": "i",
    "\xEF": "i",
    "\xD1": "N",
    "\xF1": "n",
    "\xD2": "O",
    "\xD3": "O",
    "\xD4": "O",
    "\xD5": "O",
    "\xD6": "O",
    "\xD8": "O",
    "\xF2": "o",
    "\xF3": "o",
    "\xF4": "o",
    "\xF5": "o",
    "\xF6": "o",
    "\xF8": "o",
    "\xD9": "U",
    "\xDA": "U",
    "\xDB": "U",
    "\xDC": "U",
    "\xF9": "u",
    "\xFA": "u",
    "\xFB": "u",
    "\xFC": "u",
    "\xDD": "Y",
    "\xFD": "y",
    "\xFF": "y",
    "\xC6": "Ae",
    "\xE6": "ae",
    "\xDE": "Th",
    "\xFE": "th",
    "\xDF": "ss",
    "\xD7": " ",
    "\xF7": " ",
}

# Use Javascript style regex to make Lo-Dash compatibility easier.
# Lodash Regex definitions: https://github.com/lodash/lodash/blob/master/.internal/unicodeWords.js

# References: https://github.com/lodash/lodash/blob/master/words.js#L8
RS_ASCII_WORDS = "/[^\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]+/g"
RS_LATIN1 = "/[\xC0-\xFF]/g"

# Used to compose unicode character classes.
RS_ASTRAL_RANGE = "\\ud800-\\udfff"
RS_COMBO_MARKS_RANGE = "\\u0300-\\u036f"
RE_COMBO_HALF_MARKS_RANGE = "\\ufe20-\\ufe2f"
RS_COMBO_SYMBOLS_RANGE = "\\u20d0-\\u20ff"
RS_COMBO_MARKS_EXTENDED_RANGE = "\\u1ab0-\\u1aff"
RS_COMBO_MARKS_SUPPLEMENT_RANGE = "\\u1dc0-\\u1dff"
RS_COMBO_RANGE = (
    RS_COMBO_MARKS_RANGE
    + RE_COMBO_HALF_MARKS_RANGE
    + RS_COMBO_SYMBOLS_RANGE
    + RS_COMBO_MARKS_EXTENDED_RANGE
    + RS_COMBO_MARKS_SUPPLEMENT_RANGE
)
RS_DINGBAT_RANGE = "\\u2700-\\u27bf"
RS_LOWER_RANGE = "a-z\\xdf-\\xf6\\xf8-\\xff"
RS_MATH_OP_RANGE = "\\xac\\xb1\\xd7\\xf7"
RS_NON_CHAR_RANGE = "\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf"
RS_PUNCTUATION_RANGE = "\\u2000-\\u206f"
RS_SPACE_RANGE = (
    " \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\"
    "u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\"
    "u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000"
)
RS_UPPER_RANGE = "A-Z\\xc0-\\xd6\\xd8-\\xde"
RS_VAR_RANGE = "\\ufe0e\\ufe0f"
RS_BREAK_RANGE = RS_MATH_OP_RANGE + RS_NON_CHAR_RANGE + RS_PUNCTUATION_RANGE + RS_SPACE_RANGE

# Used to compose unicode capture groups.
RS_APOS = "['\u2019]"
RS_BREAK = f"[{RS_BREAK_RANGE}]"
RS_COMBO = f"[{RS_COMBO_RANGE}]"
RS_DIGIT = "\\d"
RS_DINGBAT = f"[{RS_DINGBAT_RANGE}]"
RS_LOWER = f"[{RS_LOWER_RANGE}]"
RS_MISC = (
    f"[^{RS_ASTRAL_RANGE}{RS_BREAK_RANGE}{RS_DIGIT}"
    f"{RS_DINGBAT_RANGE}{RS_LOWER_RANGE}{RS_UPPER_RANGE}]"
)
RS_FITZ = "\\ud83c[\\udffb-\\udfff]"
RS_MODIFIER = f"(?:{RS_COMBO}|{RS_FITZ})"
RS_NON_ASTRAL = f"[^{RS_ASTRAL_RANGE}]"
RS_REGIONAL = "(?:\\ud83c[\\udde6-\\uddff]){2}"
RS_SURR_PAIR = "[\\ud800-\\udbff][\\udc00-\\udfff]"
RS_UPPER = f"[{RS_UPPER_RANGE}]"
RS_ZWJ = "\\u200d"

# Used to compose unicode regexes.
RS_MISC_LOWER = f"(?:{RS_LOWER}|{RS_MISC})"
RS_MISC_UPPER = f"(?:{RS_UPPER}|{RS_MISC})"
RS_OPT_CONTR_LOWER = f"(?:{RS_APOS}(?:d|ll|m|re|s|t|ve))?"
RS_OPT_CONTR_UPPER = f"(?:{RS_APOS}(?:D|LL|M|RE|S|T|VE))?"
RE_OPT_MOD = f"{RS_MODIFIER}?"
RS_OPT_VAR = f"[{RS_VAR_RANGE}]?"
RS_OPT_JOIN = (
    f"(?:{RS_ZWJ}(?:{RS_NON_ASTRAL}|{RS_REGIONAL}|{RS_SURR_PAIR}){RS_OPT_VAR}{RE_OPT_MOD})*"
)
RS_ORD_LOWER = "\\d*(?:1st|2nd|3rd|(?![123])\\dth)(?=\\b|[A-Z_])"
RS_ORD_UPPER = "\\d*(?:1ST|2ND|3RD|(?![123])\\dTH)(?=\\b|[a-z_])"
RS_SEQ = RS_OPT_VAR + RE_OPT_MOD + RS_OPT_JOIN
RS_EMOJI = f"(?:{RS_DINGBAT}|{RS_REGIONAL}|{RS_SURR_PAIR}){RS_SEQ}"

RS_HAS_UNICODE_WORD = "[a-z][A-Z]|[A-Z]{2}[a-z]|[0-9][a-zA-Z]|[a-zA-Z][0-9]|[^a-zA-Z0-9 ]"
RS_UNICODE_WORDS = (
    f"/"
    f"{RS_UPPER}?{RS_LOWER}+{RS_OPT_CONTR_LOWER}(?={RS_BREAK}|{RS_UPPER}|$)"
    f"|{RS_MISC_UPPER}+{RS_OPT_CONTR_UPPER}(?={RS_BREAK}|{RS_UPPER}{RS_MISC_LOWER}|$)"
    f"|{RS_UPPER}?{RS_MISC_LOWER}+{RS_OPT_CONTR_LOWER}"
    f"|{RS_UPPER}+{RS_OPT_CONTR_UPPER}"
    f"|{RS_ORD_UPPER}"
    f"|{RS_ORD_LOWER}"
    f"|{RS_DIGIT}+"
    f"|{RS_EMOJI}"
    f"/g"
)

# Compiled regexes for use in functions.
JS_RE_ASCII_WORDS = JSRegExp(RS_ASCII_WORDS)
JS_RE_UNICODE_WORDS = JSRegExp(RS_UNICODE_WORDS)
JS_RE_LATIN1 = JSRegExp(RS_LATIN1)
RE_HAS_UNICODE_WORD = re.compile(RS_HAS_UNICODE_WORD)
RE_APOS = re.compile(RS_APOS)
RE_HTML_TAGS = re.compile(r"<\/?[^>]+>")


[docs]def camel_case(text): """ Converts `text` to camel case. Args: text (str): String to convert. Returns: str: String converted to camel case. Example: >>> camel_case('FOO BAR_bAz') 'fooBarBAz' .. versionadded:: 1.1.0 .. versionchanged:: 5.0.0 Improved unicode word support. """ text = "".join(word.title() for word in compounder(text)) return text[:1].lower() + text[1:]
[docs]def capitalize(text, strict=True): """ Capitalizes the first character of `text`. Args: text (str): String to capitalize. strict (bool, optional): Whether to cast rest of string to lower case. Defaults to ``True``. Returns: str: Capitalized string. Example: >>> capitalize('once upon a TIME') 'Once upon a time' >>> capitalize('once upon a TIME', False) 'Once upon a TIME' .. versionadded:: 1.1.0 .. versionchanged:: 3.0.0 Added `strict` option. """ text = pyd.to_string(text) return text.capitalize() if strict else text[:1].upper() + text[1:]
[docs]def chars(text): """ Split `text` into a list of single characters. Args: text (str): String to split up. Returns: list: List of individual characters. Example: >>> chars('onetwo') ['o', 'n', 'e', 't', 'w', 'o'] .. versionadded:: 3.0.0 """ return list(pyd.to_string(text))
[docs]def chop(text, step): """ Break up `text` into intervals of length `step`. Args: text (str): String to chop. step (int): Interval to chop `text`. Returns: list: List of chopped characters. If `text` is `None` an empty list is returned. Example: >>> chop('abcdefg', 3) ['abc', 'def', 'g'] .. versionadded:: 3.0.0 """ if text is None: return [] text = pyd.to_string(text) if step <= 0: chopped = [text] else: chopped = [text[i : i + step] for i in range(0, len(text), step)] return chopped
[docs]def chop_right(text, step): """ Like :func:`chop` except `text` is chopped from right. Args: text (str): String to chop. step (int): Interval to chop `text`. Returns: list: List of chopped characters. Example: >>> chop_right('abcdefg', 3) ['a', 'bcd', 'efg'] .. versionadded:: 3.0.0 """ if text is None: return [] text = pyd.to_string(text) if step <= 0: chopped = [text] else: text_len = len(text) chopped = [text[-(i + step) : text_len - i] for i in range(0, text_len, step)][::-1] return chopped
[docs]def clean(text): """ Trim and replace multiple spaces with a single space. Args: text (str): String to clean. Returns: str: Cleaned string. Example: >>> clean('a b c d') 'a b c d' .. versionadded:: 3.0.0 """ text = pyd.to_string(text) return " ".join(pyd.compact(text.split()))
[docs]def count_substr(text, subtext): """ Count the occurrences of `subtext` in `text`. Args: text (str): Source string to count from. subtext (str): String to count. Returns: int: Number of occurrences of `subtext` in `text`. Example: >>> count_substr('aabbccddaabbccdd', 'bc') 2 .. versionadded:: 3.0.0 """ if text is None or subtext is None: return 0 text = pyd.to_string(text) subtext = pyd.to_string(subtext) return text.count(subtext)
[docs]def deburr(text): """ Deburrs `text` by converting latin-1 supplementary letters to basic latin letters. Args: text (str): String to deburr. Returns: str: Deburred string. Example: >>> deburr('déjà vu') '... >>> 'deja vu' 'deja vu' .. versionadded:: 2.0.0 """ text = pyd.to_string(text) return JS_RE_LATIN1.replace( text, lambda match: DEBURRED_LETTERS.get(match.group(), match.group()) )
[docs]def decapitalize(text): """ Decaptitalizes the first character of `text`. Args: text (str): String to decapitalize. Returns: str: Decapitalized string. Example: >>> decapitalize('FOO BAR') 'fOO BAR' .. versionadded:: 3.0.0 """ text = pyd.to_string(text) return text[:1].lower() + text[1:]
[docs]def ends_with(text, target, position=None): """ Checks if `text` ends with a given target string. Args: text (str): String to check. target (str): String to check for. position (int, optional): Position to search from. Defaults to end of `text`. Returns: bool: Whether `text` ends with `target`. Example: >>> ends_with('abc def', 'def') True >>> ends_with('abc def', 4) False .. versionadded:: 1.1.0 """ target = pyd.to_string(target) text = pyd.to_string(text) if position is None: position = len(text) return text[:position].endswith(target)
[docs]def ensure_ends_with(text, suffix): """ Append a given suffix to a string, but only if the source string does not end with that suffix. Args: text (str): Source string to append `suffix` to. suffix (str): String to append to the source string if the source string does not end with `suffix`. Returns: str: source string possibly extended by `suffix`. Example: >>> ensure_ends_with('foo bar', '!') 'foo bar!' >>> ensure_ends_with('foo bar!', '!') 'foo bar!' .. versionadded:: 2.4.0 """ text = pyd.to_string(text) suffix = pyd.to_string(suffix) if text.endswith(suffix): return text return f"{text}{suffix}"
[docs]def ensure_starts_with(text, prefix): """ Prepend a given prefix to a string, but only if the source string does not start with that prefix. Args: text (str): Source string to prepend `prefix` to. prefix (str): String to prepend to the source string if the source string does not start with `prefix`. Returns: str: source string possibly prefixed by `prefix` Example: >>> ensure_starts_with('foo bar', 'Oh my! ') 'Oh my! foo bar' >>> ensure_starts_with('Oh my! foo bar', 'Oh my! ') 'Oh my! foo bar' .. versionadded:: 2.4.0 """ text = pyd.to_string(text) prefix = pyd.to_string(prefix) if text.startswith(prefix): return text return f"{prefix}{text}"
[docs]def escape(text): r""" Converts the characters ``&``, ``<``, ``>``, ``"``, ``'``, and ``\``` in `text` to their corresponding HTML entities. Args: text (str): String to escape. Returns: str: HTML escaped string. Example: >>> escape('"1 > 2 && 3 < 4"') '&quot;1 &gt; 2 &amp;&amp; 3 &lt; 4&quot;' .. versionadded:: 1.0.0 .. versionchanged:: 1.1.0 Moved function to :mod:`pydash.strings`. """ text = pyd.to_string(text) # NOTE: Not using html.escape because Lo-Dash escapes certain chars differently (e.g. "'" isn't # escaped by html.escape() but is by Lo-Dash). return "".join(HTML_ESCAPES.get(char, char) for char in text)
[docs]def escape_reg_exp(text): """ Escapes the RegExp special characters in `text`. Args: text (str): String to escape. Returns: str: RegExp escaped string. Example: >>> escape_reg_exp('[()]') '\\\\[\\\\(\\\\)\\\\]' .. versionadded:: 1.1.0 .. versionchanged:: 4.0.0 Removed alias ``escape_re`` """ text = pyd.to_string(text) return re.escape(text)
[docs]def has_substr(text, subtext): """ Returns whether `subtext` is included in `text`. Args: text (str): String to search. subtext (str): String to search for. Returns: bool: Whether `subtext` is found in `text`. Example: >>> has_substr('abcdef', 'bc') True >>> has_substr('abcdef', 'bb') False .. versionadded:: 3.0.0 """ text = pyd.to_string(text) subtext = pyd.to_string(subtext) return text.find(subtext) >= 0
[docs]def human_case(text): """ Converts `text` to human case which has only the first letter capitalized and each word separated by a space. Args: text (str): String to convert. Returns: str: String converted to human case. Example: >>> human_case('abc-def_hij lmn') 'Abc def hij lmn' >>> human_case('user_id') 'User' .. versionadded:: 3.0.0 .. versionchanged:: 5.0.0 Improved unicode word support. """ return ( pyd.chain(text) .snake_case() .reg_exp_replace("_id$", "") .replace("_", " ") .capitalize() .value() )
[docs]def insert_substr(text, index, subtext): """ Insert `subtext` in `text` starting at position `index`. Args: text (str): String to add substring to. index (int): String index to insert into. subtext (str): String to insert. Returns: str: Modified string. Example: >>> insert_substr('abcdef', 3, '--') 'abc--def' .. versionadded:: 3.0.0 """ text = pyd.to_string(text) subtext = pyd.to_string(subtext) return text[:index] + subtext + text[index:]
[docs]def join(array, separator=""): """ Joins an iterable into a string using `separator` between each element. Args: array (iterable): Iterable to implode. separator (str, optional): Separator to using when joining. Defaults to ``''``. Returns: str: Joined string. Example: >>> join(['a', 'b', 'c']) == 'abc' True >>> join([1, 2, 3, 4], '&') == '1&2&3&4' True >>> join('abcdef', '-') == 'a-b-c-d-e-f' True .. versionadded:: 2.0.0 .. versionchanged:: 4.0.0 Removed alias ``implode``. """ return pyd.to_string(separator).join(pyd.map_(array or (), pyd.to_string))
[docs]def kebab_case(text): """ Converts `text` to kebab case (a.k.a. spinal case). Args: text (str): String to convert. Returns: str: String converted to kebab case. Example: >>> kebab_case('a b c_d-e!f') 'a-b-c-d-e-f' .. versionadded:: 1.1.0 .. versionchanged:: 5.0.0 Improved unicode word support. """ return "-".join(word.lower() for word in compounder(text) if word)
[docs]def lines(text): r"""Split lines in `text` into an array. Args: text (str): String to split. Returns: list: String split by lines. Example: >>> lines('a\nb\r\nc') ['a', 'b', 'c'] .. versionadded:: 3.0.0 """ text = pyd.to_string(text) return text.splitlines()
[docs]def lower_case(text): """ Converts string to lower case as space separated words. Args: text (str): String to convert. Returns: str: String converted to lower case as space separated words. Example: >>> lower_case('fooBar') 'foo bar' >>> lower_case('--foo-Bar--') 'foo bar' >>> lower_case('/?*Foo10/;"B*Ar') 'foo 10 b ar' .. versionadded:: 4.0.0 .. versionchanged:: 5.0.0 Improved unicode word support. """ return " ".join(compounder(text)).lower()
[docs]def lower_first(text): """ Converts the first character of string to lower case. Args: text (str): String passed in by the user. Returns: str: String in which the first character is converted to lower case. Example: >>> lower_first('FRED') 'fRED' >>> lower_first('Foo Bar') 'foo Bar' >>> lower_first('1foobar') '1foobar' >>> lower_first(';foobar') ';foobar' .. versionadded:: 4.0.0 """ return text[:1].lower() + text[1:]
[docs]def number_format(number, scale=0, decimal_separator=".", order_separator=","): """ Format a number to scale with custom decimal and order separators. Args: number (int|float): Number to format. scale (int, optional): Number of decimals to include. Defaults to ``0``. decimal_separator (str, optional): Decimal separator to use. Defaults to ``'.'``. order_separator (str, optional): Order separator to use. Defaults to ``','``. Returns: str: Formatted number as string. Example: >>> number_format(1234.5678) '1,235' >>> number_format(1234.5678, 2, ',', '.') '1.234,57' .. versionadded:: 3.0.0 """ # Create a string formatter which converts number to the appropriately scaled representation. fmt = f"{{0:.{scale:d}f}}" try: num_parts = fmt.format(number).split(".") except ValueError: text = "" else: int_part = num_parts[0] dec_part = (num_parts + [""])[1] # Reverse the integer part, chop it into groups of 3, join on `order_separator`, and then # un-reverse the string. int_part = order_separator.join(chop(int_part[::-1], 3))[::-1] text = decimal_separator.join(pyd.compact([int_part, dec_part])) return text
[docs]def pad(text, length, chars=" "): """ Pads `text` on the left and right sides if it is shorter than the given padding length. The `chars` string may be truncated if the number of padding characters can't be evenly divided by the padding length. Args: text (str): String to pad. length (int): Amount to pad. chars (str, optional): Characters to pad with. Defaults to ``" "``. Returns: str: Padded string. Example: >>> pad('abc', 5) ' abc ' >>> pad('abc', 6, 'x') 'xabcxx' >>> pad('abc', 5, '...') '.abc.' .. versionadded:: 1.1.0 .. versionchanged:: 3.0.0 Fix handling of multiple `chars` so that padded string isn't over padded. """ # pylint: disable=redefined-outer-name text = pyd.to_string(text) text_len = len(text) if text_len >= length: return text mid = (length - text_len) / 2.0 left_len = int(math.floor(mid)) right_len = int(math.ceil(mid)) chars = pad_end("", right_len, chars) return chars[:left_len] + text + chars
[docs]def pad_end(text, length, chars=" "): """ Pads `text` on the right side if it is shorter than the given padding length. The `chars` string may be truncated if the number of padding characters can't be evenly divided by the padding length. Args: text (str): String to pad. length (int): Amount to pad. chars (str, optional): Characters to pad with. Defaults to ``" "``. Returns: str: Padded string. Example: >>> pad_end('abc', 5) 'abc ' >>> pad_end('abc', 5, '.') 'abc..' .. versionadded:: 1.1.0 .. versionchanged:: 4.0.0 Renamed from ``pad_right`` to ``pad_end``. """ # pylint: disable=redefined-outer-name text = pyd.to_string(text) length = max((length, len(text))) return (text + repeat(chars, length))[:length]
[docs]def pad_start(text, length, chars=" "): """ Pads `text` on the left side if it is shorter than the given padding length. The `chars` string may be truncated if the number of padding characters can't be evenly divided by the padding length. Args: text (str): String to pad. length (int): Amount to pad. chars (str, optional): Characters to pad with. Defaults to ``" "``. Returns: str: Padded string. Example: >>> pad_start('abc', 5) ' abc' >>> pad_start('abc', 5, '.') '..abc' .. versionadded:: 1.1.0 .. versionchanged:: 4.0.0 Renamed from ``pad_left`` to ``pad_start``. """ # pylint: disable=redefined-outer-name text = pyd.to_string(text) length = max(length, len(text)) return (repeat(chars, length) + text)[-length:]
[docs]def pascal_case(text, strict=True): """ Like :func:`camel_case` except the first letter is capitalized. Args: text (str): String to convert. strict (bool, optional): Whether to cast rest of string to lower case. Defaults to ``True``. Returns: str: String converted to class case. Example: >>> pascal_case('FOO BAR_bAz') 'FooBarBaz' >>> pascal_case('FOO BAR_bAz', False) 'FooBarBAz' .. versionadded:: 3.0.0 .. versionchanged:: 5.0.0 Improved unicode word support. """ text = pyd.to_string(text) if strict: text = text.lower() return capitalize(camel_case(text), strict=False)
[docs]def predecessor(char): """ Return the predecessor character of `char`. Args: char (str): Character to find the predecessor of. Returns: str: Predecessor character. Example: >>> predecessor('c') 'b' >>> predecessor('C') 'B' >>> predecessor('3') '2' .. versionadded:: 3.0.0 """ char = pyd.to_string(char) return chr(ord(char) - 1)
[docs]def prune(text, length=0, omission="..."): """ Like :func:`truncate` except it ensures that the pruned string doesn't exceed the original length, i.e., it avoids half-chopped words when truncating. If the pruned text + `omission` text is longer than the original text, then the original text is returned. Args: text (str): String to prune. length (int, optional): Target prune length. Defaults to ``0``. omission (str, optional): Omission text to append to the end of the pruned string. Defaults to ``'...'``. Returns: str: Pruned string. Example: >>> prune('Fe fi fo fum', 5) 'Fe fi...' >>> prune('Fe fi fo fum', 6) 'Fe fi...' >>> prune('Fe fi fo fum', 7) 'Fe fi...' >>> prune('Fe fi fo fum', 8, ',,,') 'Fe fi fo,,,' .. versionadded:: 3.0.0 """ text = pyd.to_string(text) text_len = len(text) omission_len = len(omission) if text_len <= length: return text # Replace non-alphanumeric chars with whitespace. def repl(match): char = match.group(0) return " " if char.upper() == char.lower() else char subtext = reg_exp_replace(text[: length + 1], r".(?=\W*\w*$)", repl) if re.match(r"\w\w", subtext[-2:]): # Last two characters are alphanumeric. Remove last "word" from end of string so that we # prune to the next whole word. subtext = reg_exp_replace(subtext, r"\s*\S+$", "") else: # Last character (at least) is whitespace. So remove that character as well as any other # whitespace. subtext = subtext[:-1].rstrip() subtext_len = len(subtext) # Only add omission text if doing so will result in a string that is equal two or smaller in # length than the original. if (subtext_len + omission_len) <= text_len: text = text[:subtext_len] + omission return text
[docs]def quote(text, quote_char='"'): """ Quote a string with another string. Args: text (str): String to be quoted. quote_char (str, optional): the quote character. Defaults to ``"``. Returns: str: the quoted string. Example: >>> quote('To be or not to be') '"To be or not to be"' >>> quote('To be or not to be', "'") "'To be or not to be'" .. versionadded:: 2.4.0 """ return surround(text, quote_char)
[docs]def reg_exp_js_match(text, reg_exp): """ Return list of matches using Javascript style regular expression. Args: text (str): String to evaluate. reg_exp (str): Javascript style regular expression. Returns: list: List of matches. Example: >>> reg_exp_js_match('aaBBcc', '/bb/') [] >>> reg_exp_js_match('aaBBcc', '/bb/i') ['BB'] >>> reg_exp_js_match('aaBBccbb', '/bb/i') ['BB'] >>> reg_exp_js_match('aaBBccbb', '/bb/gi') ['BB', 'bb'] .. versionadded:: 2.0.0 .. versionchanged:: 3.0.0 Reordered arguments to make `text` first. .. versionchanged:: 4.0.0 Renamed from ``js_match`` to ``reg_exp_js_match``. """ text = pyd.to_string(text) return JSRegExp(reg_exp).find(text)
[docs]def reg_exp_js_replace(text, reg_exp, repl): """ Replace `text` with `repl` using Javascript style regular expression to find matches. Args: text (str): String to evaluate. reg_exp (str): Javascript style regular expression. repl (str|callable): Replacement string or callable. Returns: str: Modified string. Example: >>> reg_exp_js_replace('aaBBcc', '/bb/', 'X') 'aaBBcc' >>> reg_exp_js_replace('aaBBcc', '/bb/i', 'X') 'aaXcc' >>> reg_exp_js_replace('aaBBccbb', '/bb/i', 'X') 'aaXccbb' >>> reg_exp_js_replace('aaBBccbb', '/bb/gi', 'X') 'aaXccX' .. versionadded:: 2.0.0 .. versionchanged:: 3.0.0 Reordered arguments to make `text` first. .. versionchanged:: 4.0.0 Renamed from ``js_replace`` to ``reg_exp_js_replace``. """ text = pyd.to_string(text) if not pyd.is_function(repl): repl = pyd.to_string(repl) return JSRegExp(reg_exp).replace(text, repl)
[docs]def reg_exp_replace(text, pattern, repl, ignore_case=False, count=0): """ Replace occurrences of regex `pattern` with `repl` in `text`. Optionally, ignore case when replacing. Optionally, set `count` to limit number of replacements. Args: text (str): String to replace. pattern (str|typing.Pattern): Pattern to find and replace. repl (str): String to substitute `pattern` with. ignore_case (bool, optional): Whether to ignore case when replacing. Defaults to ``False``. count (int, optional): Maximum number of occurrences to replace. Defaults to ``0`` which replaces all. Returns: str: Replaced string. Example: >>> reg_exp_replace('aabbcc', 'b', 'X') 'aaXXcc' >>> reg_exp_replace('aabbcc', 'B', 'X', ignore_case=True) 'aaXXcc' >>> reg_exp_replace('aabbcc', 'b', 'X', count=1) 'aaXbcc' >>> reg_exp_replace('aabbcc', '[ab]', 'X') 'XXXXcc' .. versionadded:: 3.0.0 .. versionchanged:: 4.0.0 Renamed from ``re_replace`` to ``reg_exp_replace``. """ if pattern is None: return pyd.to_string(text) return replace(text, pattern, repl, ignore_case=ignore_case, count=count, escape=False)
[docs]def repeat(text, n=0): """ Repeats the given string `n` times. Args: text (str): String to repeat. n (int, optional): Number of times to repeat the string. Returns: str: Repeated string. Example: >>> repeat('.', 5) '.....' .. versionadded:: 1.1.0 """ return pyd.to_string(text) * int(n)
[docs]def replace( text, pattern, repl, ignore_case=False, count=0, escape=True, from_start=False, from_end=False ): """ Replace occurrences of `pattern` with `repl` in `text`. Optionally, ignore case when replacing. Optionally, set `count` to limit number of replacements. Args: text (str): String to replace. pattern (str|typing.Pattern): Pattern to find and replace. repl (str): String to substitute `pattern` with. ignore_case (bool, optional): Whether to ignore case when replacing. Defaults to ``False``. count (int, optional): Maximum number of occurrences to replace. Defaults to ``0`` which replaces all. escape (bool, optional): Whether to escape `pattern` when searching. This is needed if a literal replacement is desired when `pattern` may contain special regular expression characters. Defaults to ``True``. from_start (bool, optional): Whether to limit replacement to start of string. from_end (bool, optional): Whether to limit replacement to end of string. Returns: str: Replaced string. Example: >>> replace('aabbcc', 'b', 'X') 'aaXXcc' >>> replace('aabbcc', 'B', 'X', ignore_case=True) 'aaXXcc' >>> replace('aabbcc', 'b', 'X', count=1) 'aaXbcc' >>> replace('aabbcc', '[ab]', 'X') 'aabbcc' >>> replace('aabbcc', '[ab]', 'X', escape=False) 'XXXXcc' .. versionadded:: 3.0.0 .. versionchanged:: 4.1.0 Added ``from_start`` and ``from_end`` arguments. .. versionchanged:: 5.0.0 Added support for ``pattern`` as ``typing.Pattern`` object. """ text = pyd.to_string(text) if pattern is None: return text if not pyd.is_function(repl): repl = pyd.to_string(repl) flags = re.IGNORECASE if ignore_case else 0 if isinstance(pattern, typing.Pattern): pat = pattern else: pattern = pyd.to_string(pattern) if escape: pattern = re.escape(pattern) if from_start and not pattern.startswith("^"): pattern = "^" + pattern if from_end and not pattern.endswith("$"): pattern += "$" pat = re.compile(pattern, flags=flags) return pat.sub(repl, text, count=count)
[docs]def replace_end(text, pattern, repl, ignore_case=False, escape=True): """ Like :func:`replace` except it only replaces `text` with `repl` if `pattern` mathces the end of `text`. Args: text (str): String to replace. pattern (str|typing.Pattern): Pattern to find and replace. repl (str): String to substitute `pattern` with. ignore_case (bool, optional): Whether to ignore case when replacing. Defaults to ``False``. escape (bool, optional): Whether to escape `pattern` when searching. This is needed if a literal replacement is desired when `pattern` may contain special regular expression characters. Defaults to ``True``. Returns: str: Replaced string. Example: >>> replace_end('aabbcc', 'b', 'X') 'aabbcc' >>> replace_end('aabbcc', 'c', 'X') 'aabbcX' .. versionadded:: 4.1.0 """ return replace(text, pattern, repl, ignore_case=ignore_case, escape=escape, from_end=True)
[docs]def replace_start(text, pattern, repl, ignore_case=False, escape=True): """ Like :func:`replace` except it only replaces `text` with `repl` if `pattern` mathces the start of `text`. Args: text (str): String to replace. pattern (str|typing.Pattern): Pattern to find and replace. repl (str): String to substitute `pattern` with. ignore_case (bool, optional): Whether to ignore case when replacing. Defaults to ``False``. escape (bool, optional): Whether to escape `pattern` when searching. This is needed if a literal replacement is desired when `pattern` may contain special regular expression characters. Defaults to ``True``. Returns: str: Replaced string. Example: >>> replace_start('aabbcc', 'b', 'X') 'aabbcc' >>> replace_start('aabbcc', 'a', 'X') 'Xabbcc' .. versionadded:: 4.1.0 """ return replace(text, pattern, repl, ignore_case=ignore_case, escape=escape, from_start=True)
[docs]def separator_case(text, separator): """ Splits `text` on words and joins with `separator`. Args: text (str): String to convert. separator (str): Separator to join words with. Returns: str: Converted string. Example: >>> separator_case('a!!b___c.d', '-') 'a-b-c-d' .. versionadded:: 3.0.0 .. versionchanged:: 5.0.0 Improved unicode word support. """ return separator.join(word.lower() for word in words(text) if word)
[docs]def series_phrase(items, separator=", ", last_separator=" and ", serial=False): """ Join items into a grammatical series phrase, e.g., ``"item1, item2, item3 and item4"``. Args: items (list): List of string items to join. separator (str, optional): Item separator. Defaults to ``', '``. last_separator (str, optional): Last item separator. Defaults to ``' and '``. serial (bool, optional): Whether to include `separator` with `last_separator` when number of items is greater than 2. Defaults to ``False``. Returns: str: Joined string. Example: >>> series_phrase(['apples', 'bananas', 'peaches']) 'apples, bananas and peaches' >>> series_phrase(['apples', 'bananas', 'peaches'], serial=True) 'apples, bananas, and peaches' >>> series_phrase(['apples', 'bananas', 'peaches'], '; ', ', or ') 'apples; bananas, or peaches' .. versionadded:: 3.0.0 """ items = pyd.chain(items).map(pyd.to_string).compact().value() item_count = len(items) separator = pyd.to_string(separator) last_separator = pyd.to_string(last_separator) if item_count > 2 and serial: last_separator = separator.rstrip() + last_separator if item_count >= 2: items = items[:-2] + [last_separator.join(items[-2:])] return separator.join(items)
[docs]def series_phrase_serial(items, separator=", ", last_separator=" and "): """ Join items into a grammatical series phrase using a serial separator, e.g., ``"item1, item2, item3, and item4"``. Args: items (list): List of string items to join. separator (str, optional): Item separator. Defaults to ``', '``. last_separator (str, optional): Last item separator. Defaults to ``' and '``. Returns: str: Joined string. Example: >>> series_phrase_serial(['apples', 'bananas', 'peaches']) 'apples, bananas, and peaches' .. versionadded:: 3.0.0 """ return series_phrase(items, separator, last_separator, serial=True)
[docs]def slugify(text, separator="-"): """ Convert `text` into an ASCII slug which can be used safely in URLs. Incoming `text` is converted to unicode and noramlzied using the ``NFKD`` form. This results in some accented characters being converted to their ASCII "equivalent" (e.g. ``é`` is converted to ``e``). Leading and trailing whitespace is trimmed and any remaining whitespace or other special characters without an ASCII equivalent are replaced with ``-``. Args: text (str): String to slugify. separator (str, optional): Separator to use. Defaults to ``'-'``. Returns: str: Slugified string. Example: >>> slugify('This is a slug.') == 'this-is-a-slug' True >>> slugify('This is a slug.', '+') == 'this+is+a+slug' True .. versionadded:: 3.0.0 .. versionchanged:: 5.0.0 Improved unicode word support. """ normalized = ( unicodedata.normalize("NFKD", pyd.to_string(text)).encode("ascii", "ignore").decode("utf8") ) return separator_case(normalized, separator)
[docs]def snake_case(text): """ Converts `text` to snake case. Args: text (str): String to convert. Returns: str: String converted to snake case. Example: >>> snake_case('This is Snake Case!') 'this_is_snake_case' .. versionadded:: 1.1.0 .. versionchanged:: 4.0.0 Removed alias ``underscore_case``. .. versionchanged:: 5.0.0 Improved unicode word support. """ return "_".join(word.lower() for word in compounder(text) if word)
[docs]def split(text, separator=UNSET): """ Splits `text` on `separator`. If `separator` not provided, then `text` is split on whitespace. If `separator` is falsey, then `text` is split on every character. Args: text (str): String to explode. separator (str, optional): Separator string to split on. Defaults to ``NoValue``. Returns: list: Split string. Example: >>> split('one potato, two potatoes, three potatoes, four!') ['one', 'potato,', 'two', 'potatoes,', 'three', 'potatoes,', 'four!'] >>> split('one potato, two potatoes, three potatoes, four!', ',') ['one potato', ' two potatoes', ' three potatoes', ' four!'] .. versionadded:: 2.0.0 .. versionchanged:: 3.0.0 Changed `separator` default to ``NoValue`` and supported splitting on whitespace by default. .. versionchanged:: 4.0.0 Removed alias ``explode``. """ text = pyd.to_string(text) if separator is UNSET: ret = text.split() elif separator: ret = text.split(separator) else: ret = chars(text) return ret
[docs]def start_case(text): """ Convert `text` to start case. Args: text (str): String to convert. Returns: str: String converted to start case. Example: >>> start_case("fooBar") 'Foo Bar' .. versionadded:: 3.1.0 .. versionchanged:: 5.0.0 Improved unicode word support. """ return " ".join(capitalize(word, strict=False) for word in compounder(text))
[docs]def starts_with(text, target, position=0): """ Checks if `text` starts with a given target string. Args: text (str): String to check. target (str): String to check for. position (int, optional): Position to search from. Defaults to beginning of `text`. Returns: bool: Whether `text` starts with `target`. Example: >>> starts_with('abcdef', 'a') True >>> starts_with('abcdef', 'b') False >>> starts_with('abcdef', 'a', 1) False .. versionadded:: 1.1.0 """ text = pyd.to_string(text) target = pyd.to_string(target) return text[position:].startswith(target)
[docs]def strip_tags(text): """ Removes all HTML tags from `text`. Args: text (str): String to strip. Returns: str: String without HTML tags. Example: >>> strip_tags('<a href="#">Some link</a>') 'Some link' .. versionadded:: 3.0.0 """ return RE_HTML_TAGS.sub("", pyd.to_string(text))
[docs]def substr_left(text, subtext): """ Searches `text` from left-to-right for `subtext` and returns a substring consisting of the characters in `text` that are to the left of `subtext` or all string if no match found. Args: text (str): String to partition. subtext (str): String to search for. Returns: str: Substring to left of `subtext`. Example: >>> substr_left('abcdefcdg', 'cd') 'ab' .. versionadded:: 3.0.0 """ text = pyd.to_string(text) return text.partition(subtext)[0] if subtext else text
[docs]def substr_left_end(text, subtext): """ Searches `text` from right-to-left for `subtext` and returns a substring consisting of the characters in `text` that are to the left of `subtext` or all string if no match found. Args: text (str): String to partition. subtext (str): String to search for. Returns: str: Substring to left of `subtext`. Example: >>> substr_left_end('abcdefcdg', 'cd') 'abcdef' .. versionadded:: 3.0.0 """ text = pyd.to_string(text) return text.rpartition(subtext)[0] or text if subtext else text
[docs]def substr_right(text, subtext): """ Searches `text` from right-to-left for `subtext` and returns a substring consisting of the characters in `text` that are to the right of `subtext` or all string if no match found. Args: text (str): String to partition. subtext (str): String to search for. Returns: str: Substring to right of `subtext`. Example: >>> substr_right('abcdefcdg', 'cd') 'efcdg' .. versionadded:: 3.0.0 """ text = pyd.to_string(text) return text.partition(subtext)[2] or text if subtext else text
[docs]def substr_right_end(text, subtext): """ Searches `text` from left-to-right for `subtext` and returns a substring consisting of the characters in `text` that are to the right of `subtext` or all string if no match found. Args: text (str): String to partition. subtext (str): String to search for. Returns: str: Substring to right of `subtext`. Example: >>> substr_right_end('abcdefcdg', 'cd') 'g' .. versionadded:: 3.0.0 """ text = pyd.to_string(text) return text.rpartition(subtext)[2] if subtext else text
[docs]def successor(char): """ Return the successor character of `char`. Args: char (str): Character to find the successor of. Returns: str: Successor character. Example: >>> successor('b') 'c' >>> successor('B') 'C' >>> successor('2') '3' .. versionadded:: 3.0.0 """ char = pyd.to_string(char) return chr(ord(char) + 1)
[docs]def surround(text, wrapper): """ Surround a string with another string. Args: text (str): String to surround with `wrapper`. wrapper (str): String by which `text` is to be surrounded. Returns: str: Surrounded string. Example: >>> surround('abc', '"') '"abc"' >>> surround('abc', '!') '!abc!' .. versionadded:: 2.4.0 """ text = pyd.to_string(text) wrapper = pyd.to_string(wrapper) return f"{wrapper}{text}{wrapper}"
[docs]def swap_case(text): """ Swap case of `text` characters. Args: text (str): String to swap case. Returns: str: String with swapped case. Example: >>> swap_case('aBcDeF') 'AbCdEf' .. versionadded:: 3.0.0 """ text = pyd.to_string(text) return text.swapcase()
[docs]def title_case(text): """ Convert `text` to title case. Args: text (str): String to convert. Returns: str: String converted to title case. Example: >>> title_case("bob's shop") "Bob's Shop" .. versionadded:: 3.0.0 """ text = pyd.to_string(text) # NOTE: Can't use text.title() since it doesn't handle apostrophes. return " ".join(word.capitalize() for word in re.split(" ", text))
[docs]def to_lower(text): """ Converts the given :attr:`text` to lower text. Args: text (str): String to convert. Returns: str: String converted to lower case. Example: >>> to_lower('--Foo-Bar--') '--foo-bar--' >>> to_lower('fooBar') 'foobar' >>> to_lower('__FOO_BAR__') '__foo_bar__' .. versionadded:: 4.0.0 """ return pyd.to_string(text).lower()
[docs]def to_upper(text): """ Converts the given :attr:`text` to upper text. Args: text (str): String to convert. Returns: str: String converted to upper case. Example: >>> to_upper('--Foo-Bar--') '--FOO-BAR--' >>> to_upper('fooBar') 'FOOBAR' >>> to_upper('__FOO_BAR__') '__FOO_BAR__' .. versionadded:: 4.0.0 """ return pyd.to_string(text).upper()
[docs]def trim(text, chars=None): r""" Removes leading and trailing whitespace or specified characters from `text`. Args: text (str): String to trim. chars (str, optional): Specific characters to remove. Returns: str: Trimmed string. Example: >>> trim(' abc efg\r\n ') 'abc efg' .. versionadded:: 1.1.0 """ # pylint: disable=redefined-outer-name text = pyd.to_string(text) return text.strip(chars)
[docs]def trim_end(text, chars=None): r""" Removes trailing whitespace or specified characters from `text`. Args: text (str): String to trim. chars (str, optional): Specific characters to remove. Returns: str: Trimmed string. Example: >>> trim_end(' abc efg\r\n ') ' abc efg' .. versionadded:: 1.1.0 .. versionchanged:: 4.0.0 Renamed from ``trim_right`` to ``trim_end``. """ text = pyd.to_string(text) return text.rstrip(chars)
[docs]def trim_start(text, chars=None): r""" Removes leading whitespace or specified characters from `text`. Args: text (str): String to trim. chars (str, optional): Specific characters to remove. Returns: str: Trimmed string. Example: >>> trim_start(' abc efg\r\n ') 'abc efg\r\n ' .. versionadded:: 1.1.0 .. versionchanged:: 4.0.0 Renamed from ``trim_left`` to ``trim_start``. """ text = pyd.to_string(text) return text.lstrip(chars)
[docs]def truncate(text, length=30, omission="...", separator=None): """ Truncates `text` if it is longer than the given maximum string length. The last characters of the truncated string are replaced with the omission string which defaults to ``...``. Args: text (str): String to truncate. length (int, optional): Maximum string length. Defaults to ``30``. omission (str, optional): String to indicate text is omitted. separator (mixed, optional): Separator pattern to truncate to. Returns: str: Truncated string. Example: >>> truncate('hello world', 5) 'he...' >>> truncate('hello world', 5, '..') 'hel..' >>> truncate('hello world', 10) 'hello w...' >>> truncate('hello world', 10, separator=' ') 'hello...' .. versionadded:: 1.1.0 .. versionchanged:: 4.0.0 Removed alias ``trunc``. """ text = pyd.to_string(text) if len(text) <= length: return text omission_len = len(omission) text_len = length - omission_len text = text[:text_len] trunc_len = len(text) if pyd.is_string(separator): trunc_len = text.rfind(separator) elif pyd.is_reg_exp(separator): last = None for match in separator.finditer(text): last = match if last is not None: trunc_len = last.start() return text[:trunc_len] + omission
[docs]def unescape(text): """ The inverse of :func:`escape`. This method converts the HTML entities ``&amp;``, ``&lt;``, ``&gt;``, ``&quot;``, ``&#39;``, and ``&#96;`` in `text` to their corresponding characters. Args: text (str): String to unescape. Returns: str: HTML unescaped string. Example: >>> results = unescape('&quot;1 &gt; 2 &amp;&amp; 3 &lt; 4&quot;') >>> results == '"1 > 2 && 3 < 4"' True .. versionadded:: 1.0.0 .. versionchanged:: 1.1.0 Moved to :mod:`pydash.strings`. """ text = pyd.to_string(text) return html.unescape(text)
[docs]def upper_case(text): """ Converts string to upper case, as space separated words. Args: text (str): String to be converted to uppercase. Returns: str: String converted to uppercase, as space separated words. Example: >>> upper_case('--foo-bar--') 'FOO BAR' >>> upper_case('fooBar') 'FOO BAR' >>> upper_case('/?*Foo10/;"B*Ar') 'FOO 10 B AR' .. versionadded:: 4.0.0 .. versionchanged:: 5.0.0 Improved unicode word support. """ return " ".join(compounder(text)).upper()
[docs]def upper_first(text): """ Converts the first character of string to upper case. Args: text (str): String passed in by the user. Returns: str: String in which the first character is converted to upper case. Example: >>> upper_first('fred') 'Fred' >>> upper_first('foo bar') 'Foo bar' >>> upper_first('1foobar') '1foobar' >>> upper_first(';foobar') ';foobar' .. versionadded:: 4.0.0 """ return text[:1].upper() + text[1:]
[docs]def unquote(text, quote_char='"'): """ Unquote `text` by removing `quote_char` if `text` begins and ends with it. Args: text (str): String to unquote. quote_char (str, optional): Quote character to remove. Defaults to `"`. Returns: str: Unquoted string. Example: >>> unquote('"abc"') 'abc' >>> unquote('"abc"', '#') '"abc"' >>> unquote('#abc', '#') '#abc' >>> unquote('#abc#', '#') 'abc' .. versionadded:: 3.0.0 """ text = pyd.to_string(text) inner = text[1:-1] if text == f"{quote_char}{inner}{quote_char}": text = inner return text
[docs]def url(*paths, **params): """ Combines a series of URL paths into a single URL. Optionally, pass in keyword arguments to append query parameters. Args: paths (str): URL paths to combine. Keyword Args: params (str, optional): Query parameters. Returns: str: URL string. Example: >>> link = url('a', 'b', ['c', 'd'], '/', q='X', y='Z') >>> path, params = link.split('?') >>> path == 'a/b/c/d/' True >>> set(params.split('&')) == set(['q=X', 'y=Z']) True .. versionadded:: 2.2.0 """ paths = pyd.chain(paths).flatten_deep().map(pyd.to_string).value() paths_list = [] params_list = flatten_url_params(params) for path in paths: scheme, netloc, path, query, fragment = urlsplit(path) query = parse_qsl(query) params_list += query paths_list.append(urlunsplit((scheme, netloc, path, "", fragment))) path = delimitedpathjoin("/", *paths_list) scheme, netloc, path, query, fragment = urlsplit(path) query = urlencode(params_list) return urlunsplit((scheme, netloc, path, query, fragment))
[docs]def words(text, pattern=None): """ Return list of words contained in `text`. References: https://github.com/lodash/lodash/blob/master/words.js#L30 Args: text (str): String to split. pattern (str, optional): Custom pattern to split words on. Defaults to ``None``. Returns: list: List of words. Example: >>> words('a b, c; d-e') ['a', 'b', 'c', 'd', 'e'] >>> words('fred, barney, & pebbles', '/[^, ]+/g') ['fred', 'barney', '&', 'pebbles'] .. versionadded:: 2.0.0 .. versionchanged:: 3.2.0 Added `pattern` argument. .. versionchanged:: 3.2.0 Improved matching for one character words. .. versionchanged:: 5.0.0 Improved unicode word support. """ text = pyd.to_string(text) if pattern is None: if has_unicode_word(text): reg_exp = JS_RE_UNICODE_WORDS else: reg_exp = JS_RE_ASCII_WORDS else: reg_exp = JSRegExp(pattern) return reg_exp.find(text)
# # Utility functions not a part of main API # def compounder(text): """ Remove single quote before passing into words() to match Lodash-style outputs. Required by certain functions such as kebab_case, camel_case, start_case etc. References: https://github.com/lodash/lodash/blob/4.17.15/lodash.js#L4968 """ return words(deburr(RE_APOS.sub("", pyd.to_string(text)))) def has_unicode_word(text): """ Check if the text contains unicode or requires more complex regex to handle. References: https://github.com/lodash/lodash/blob/master/words.js#L3 """ result = RE_HAS_UNICODE_WORD.search(text) return bool(result) def delimitedpathjoin(delimiter, *paths): """ Join delimited path using specified delimiter. >>> assert delimitedpathjoin('.', '') == '' >>> assert delimitedpathjoin('.', '.') == '.' >>> assert delimitedpathjoin('.', ['', '.a']) == '.a' >>> assert delimitedpathjoin('.', ['a', '.']) == 'a.' >>> assert delimitedpathjoin('.', ['', '.a', '', '', 'b']) == '.a.b' >>> ret = '.a.b.c.d.e.' >>> assert delimitedpathjoin('.', ['.a.', 'b.', '.c', 'd', 'e.']) == ret >>> assert delimitedpathjoin('.', ['a', 'b', 'c']) == 'a.b.c' >>> ret = 'a.b.c.d.e.f' >>> assert delimitedpathjoin('.', ['a.b', '.c.d.', '.e.f']) == ret >>> ret = '.a.b.c.1.' >>> assert delimitedpathjoin('.', '.', 'a', 'b', 'c', 1, '.') == ret >>> assert delimitedpathjoin('.', []) == '' """ paths = [pyd.to_string(path) for path in pyd.flatten_deep(paths) if path] if len(paths) == 1: # Special case where there's no need to join anything. Doing this because if # path==[delimiter], then an extra delimiter would be added if the else clause ran instead. path = paths[0] else: leading = delimiter if paths and paths[0].startswith(delimiter) else "" trailing = delimiter if paths and paths[-1].endswith(delimiter) else "" middle = delimiter.join([path.strip(delimiter) for path in paths if path.strip(delimiter)]) path = "".join([leading, middle, trailing]) return path def flatten_url_params(params): """ Flatten URL params into list of tuples. If any param value is a list or tuple, then map each value to the param key. >>> params = [('a', 1), ('a', [2, 3])] >>> assert flatten_url_params(params) == [('a', 1), ('a', 2), ('a', 3)] >>> params = {'a': [1, 2, 3]} >>> assert flatten_url_params(params) == [('a', 1), ('a', 2), ('a', 3)] """ if isinstance(params, dict): params = list(params.items()) flattened = [] for param, value in params: if isinstance(value, (list, tuple)): flattened += zip([param] * len(value), value) else: flattened.append((param, value)) return flattened