Source code for icalendar.parser.string

"""Functions for manipulating strings and bytes."""

import re

from icalendar.parser_tools import DEFAULT_ENCODING


[docs] def escape_char(text: str | bytes) -> str | bytes: r"""Format value according to iCalendar TEXT escaping rules. Escapes special characters in text values according to :rfc:`5545#section-3.3.11` rules. The order of replacements matters to avoid double-escaping. Parameters: text: The text to escape. Returns: The escaped text with special characters escaped. Note: The replacement order is critical: 1. ``\N`` -> ``\n`` (normalize newlines to lowercase) 2. ``\`` -> ``\\`` (escape backslashes) 3. ``;`` -> ``\;`` (escape semicolons) 4. ``,`` -> ``\,`` (escape commas) 5. ``\r\n`` -> ``\n`` (normalize line endings) 6. ``"\n"`` -> ``r"\n"`` (transform a newline character to a literal, or raw, newline character) """ assert isinstance(text, (str, bytes)) # NOTE: ORDER MATTERS! return ( text.replace(r"\N", "\n") .replace("\\", "\\\\") .replace(";", r"\;") .replace(",", r"\,") .replace("\r\n", r"\n") .replace("\n", r"\n") )
[docs] def unescape_char(text: str | bytes) -> str | bytes | None: r"""Unescape iCalendar TEXT values. Reverses the escaping applied by :func:`escape_char` according to :rfc:`5545#section-3.3.11` TEXT escaping rules. Parameters: text: The escaped text. Returns: The unescaped text, or ``None`` if ``text`` is neither ``str`` nor ``bytes``. Note: The replacement order is critical to avoid double-unescaping: 1. ``\N`` -> ``\n`` (intermediate step) 2. ``\r\n`` -> ``\n`` (normalize line endings) 3. ``\n`` -> newline (unescape newlines) 4. ``\,`` -> ``,`` (unescape commas) 5. ``\;`` -> ``;`` (unescape semicolons) 6. ``\\`` -> ``\`` (unescape backslashes last) """ assert isinstance(text, (str, bytes)) # NOTE: ORDER MATTERS! if isinstance(text, str): return ( text.replace("\\N", "\\n") .replace("\r\n", "\n") .replace("\\n", "\n") .replace("\\,", ",") .replace("\\;", ";") .replace("\\\\", "\\") ) if isinstance(text, bytes): return ( text.replace(b"\\N", b"\\n") .replace(b"\r\n", b"\n") .replace(b"\\n", b"\n") .replace(b"\\,", b",") .replace(b"\\;", b";") .replace(b"\\\\", b"\\") ) return None
[docs] def foldline(line: str, limit: int = 75, fold_sep: str = "\r\n ") -> str: """Make a string folded as defined in RFC5545 Lines of text SHOULD NOT be longer than 75 octets, excluding the line break. Long content lines SHOULD be split into a multiple line representations using a line "folding" technique. That is, a long line can be split between any two characters by inserting a CRLF immediately followed by a single linear white-space character (i.e., SPACE or HTAB). """ assert isinstance(line, str) assert "\n" not in line # Use a fast and simple variant for the common case that line is all ASCII. try: line.encode("ascii") except (UnicodeEncodeError, UnicodeDecodeError): pass else: return fold_sep.join( line[i : i + limit - 1] for i in range(0, len(line), limit - 1) ) ret_chars: list[str] = [] byte_count = 0 for char in line: char_byte_len = len(char.encode(DEFAULT_ENCODING)) byte_count += char_byte_len if byte_count >= limit: ret_chars.append(fold_sep) byte_count = char_byte_len ret_chars.append(char) return "".join(ret_chars)
[docs] def escape_string(val: str) -> str: r"""Escape backslash sequences to URL-encoded hex values. Converts backslash-escaped characters to their percent-encoded hex equivalents. This is used for parameter parsing to preserve escaped characters during processing. Parameters: val: The string with backslash escapes. Returns: The string with backslash escapes converted to percent encoding. Note: Conversions: - ``\,`` -> ``%2C`` - ``\:`` -> ``%3A`` - ``\;`` -> ``%3B`` - ``\\`` -> ``%5C`` """ # f'{i:02X}' return ( val.replace(r"\,", "%2C") .replace(r"\:", "%3A") .replace(r"\;", "%3B") .replace(r"\\", "%5C") )
[docs] def unescape_string(val: str) -> str: r"""Unescape URL-encoded hex values to their original characters. Reverses :func:`escape_string` by converting percent-encoded hex values back to their original characters. This is used for parameter parsing. Parameters: val: The string with percent-encoded values. Returns: The string with percent encoding converted to characters. Note: Conversions: - ``%2C`` -> ``,`` - ``%3A`` -> ``:`` - ``%3B`` -> ``;`` - ``%5C`` -> ``\`` """ return ( val.replace("%2C", ",") .replace("%3A", ":") .replace("%3B", ";") .replace("%5C", "\\") )
# [\w-] because of the iCalendar RFC # . because of the vCard RFC NAME = re.compile(r"[\w.-]+")
[docs] def validate_token(name: str) -> None: r"""Validate that a name is a valid iCalendar token. Checks if the name matches the :rfc:`5545` token syntax using the NAME regex pattern (``[\w.-]+``). Parameters: name: The token name to validate. Raises: ValueError: If the name is not a valid token. """ match = NAME.findall(name) if len(match) == 1 and name == match[0]: return raise ValueError(name)
__all__ = [ "escape_char", "escape_string", "foldline", "unescape_char", "unescape_string", "validate_token", ]