Source code for icalendar.parser.property

"""Tools for parsing properties."""

import re

from icalendar.parser.string import unescape_string



[docs]
def unescape_list_or_string(val: str | list[str]) -> str | list[str]:
    """Unescape a value that may be a string or list of strings.

    Applies :func:`unescape_string` to the value. If the value is a list,
    unescapes each element.

    Parameters:
        val: A string or list of strings to unescape.

    Returns:
        The unescaped values.
    """
    if isinstance(val, list):
        return [unescape_string(s) for s in val]
    return unescape_string(val)



_unescape_backslash_regex = re.compile(r"\\([\\,;:nN])")



[docs]
def unescape_backslash(val: str):
    r"""Unescape backslash sequences in iCalendar text.

    Unlike :py:meth:`unescape_string`, this only handles actual backslash escapes
    per :rfc:`5545`, not URL encoding. This preserves URL-encoded values
    like ``%3A`` in URLs.

    Processes backslash escape sequences in a single pass using regex matching.
    """
    return _unescape_backslash_regex.sub(
        lambda m: "\n" if m.group(1) in "nN" else m.group(1), val
    )




[docs]
def split_on_unescaped_comma(text: str) -> list[str]:
    r"""Split text on unescaped commas and unescape each part.

    Splits only on commas not preceded by backslash.
    After splitting, unescapes backslash sequences in each part.

    Parameters:
        text: Text with potential escaped commas (e.g., "foo\\, bar,baz")

    Returns:
        List of unescaped category strings

    Examples:
        .. code-block:: pycon

            >>> from icalendar.parser import split_on_unescaped_comma
            >>> split_on_unescaped_comma(r"foo\, bar,baz")
            ['foo, bar', 'baz']
            >>> split_on_unescaped_comma("a,b,c")
            ['a', 'b', 'c']
            >>> split_on_unescaped_comma(r"a\,b\,c")
            ['a,b,c']
            >>> split_on_unescaped_comma(r"Work,Personal\,Urgent")
            ['Work', 'Personal,Urgent']
    """
    if not text:
        return [""]

    result = []
    current = []
    i = 0

    while i < len(text):
        if text[i] == "\\" and i + 1 < len(text):
            # Escaped character - keep both backslash and next char
            current.append(text[i])
            current.append(text[i + 1])
            i += 2
        elif text[i] == ",":
            # Unescaped comma - split point
            result.append(unescape_backslash("".join(current)))
            current = []
            i += 1
        else:
            current.append(text[i])
            i += 1

    # Add final part
    result.append(unescape_backslash("".join(current)))

    return result




[docs]
def split_on_unescaped_semicolon(text: str) -> list[str]:
    r"""Split text on unescaped semicolons and unescape each part.

    Splits only on semicolons not preceded by a backslash.
    After splitting, unescapes backslash sequences in each part.
    Used by vCard structured properties (ADR, N, ORG) per :rfc:`6350`.

    Parameters:
        text: Text with potential escaped semicolons (e.g., "field1\\;with;field2")

    Returns:
        List of unescaped field strings

    Examples:
        .. code-block:: pycon

            >>> from icalendar.parser import split_on_unescaped_semicolon
            >>> split_on_unescaped_semicolon(r"field1\;with;field2")
            ['field1;with', 'field2']
            >>> split_on_unescaped_semicolon("a;b;c")
            ['a', 'b', 'c']
            >>> split_on_unescaped_semicolon(r"a\;b\;c")
            ['a;b;c']
            >>> split_on_unescaped_semicolon(r"PO Box 123\;Suite 200;City")
            ['PO Box 123;Suite 200', 'City']
    """
    if not text:
        return [""]

    result = []
    current = []
    i = 0

    while i < len(text):
        if text[i] == "\\" and i + 1 < len(text):
            # Escaped character - keep both backslash and next char
            current.append(text[i])
            current.append(text[i + 1])
            i += 2
        elif text[i] == ";":
            # Unescaped semicolon - split point
            result.append(unescape_backslash("".join(current)))
            current = []
            i += 1
        else:
            current.append(text[i])
            i += 1

    # Add final part
    result.append(unescape_backslash("".join(current)))

    return result



__all__ = [
    "split_on_unescaped_comma",
    "split_on_unescaped_semicolon",
    "unescape_backslash",
    "unescape_list_or_string",
]