Source code for icalendar.parser.property

"""Tools for parsing properties."""

import re

from icalendar.parser.string import unescape_string


[docs] def unescape_list_or_string(val: str | list[str]) -> str | list[str]: """Unescape a value that may be a string or list of strings. Applies :func:`unescape_string` to the value. If the value is a list, unescapes each element. Parameters: val: A string or list of strings to unescape. Returns: The unescaped values. """ if isinstance(val, list): return [unescape_string(s) for s in val] return unescape_string(val)
_unescape_backslash_regex = re.compile(r"\\([\\,;:nN])")
[docs] def unescape_backslash(val: str): r"""Unescape backslash sequences in iCalendar text. Unlike :py:meth:`unescape_string`, this only handles actual backslash escapes per :rfc:`5545`, not URL encoding. This preserves URL-encoded values like ``%3A`` in URLs. Processes backslash escape sequences in a single pass using regex matching. """ return _unescape_backslash_regex.sub( lambda m: "\n" if m.group(1) in "nN" else m.group(1), val )
[docs] def split_on_unescaped_comma(text: str) -> list[str]: r"""Split text on unescaped commas and unescape each part. Splits only on commas not preceded by backslash. After splitting, unescapes backslash sequences in each part. Parameters: text: Text with potential escaped commas (e.g., "foo\\, bar,baz") Returns: List of unescaped category strings Examples: .. code-block:: pycon >>> from icalendar.parser import split_on_unescaped_comma >>> split_on_unescaped_comma(r"foo\, bar,baz") ['foo, bar', 'baz'] >>> split_on_unescaped_comma("a,b,c") ['a', 'b', 'c'] >>> split_on_unescaped_comma(r"a\,b\,c") ['a,b,c'] >>> split_on_unescaped_comma(r"Work,Personal\,Urgent") ['Work', 'Personal,Urgent'] """ if not text: return [""] result = [] current = [] i = 0 while i < len(text): if text[i] == "\\" and i + 1 < len(text): # Escaped character - keep both backslash and next char current.append(text[i]) current.append(text[i + 1]) i += 2 elif text[i] == ",": # Unescaped comma - split point result.append(unescape_backslash("".join(current))) current = [] i += 1 else: current.append(text[i]) i += 1 # Add final part result.append(unescape_backslash("".join(current))) return result
[docs] def split_on_unescaped_semicolon(text: str) -> list[str]: r"""Split text on unescaped semicolons and unescape each part. Splits only on semicolons not preceded by a backslash. After splitting, unescapes backslash sequences in each part. Used by vCard structured properties (ADR, N, ORG) per :rfc:`6350`. Parameters: text: Text with potential escaped semicolons (e.g., "field1\\;with;field2") Returns: List of unescaped field strings Examples: .. code-block:: pycon >>> from icalendar.parser import split_on_unescaped_semicolon >>> split_on_unescaped_semicolon(r"field1\;with;field2") ['field1;with', 'field2'] >>> split_on_unescaped_semicolon("a;b;c") ['a', 'b', 'c'] >>> split_on_unescaped_semicolon(r"a\;b\;c") ['a;b;c'] >>> split_on_unescaped_semicolon(r"PO Box 123\;Suite 200;City") ['PO Box 123;Suite 200', 'City'] """ if not text: return [""] result = [] current = [] i = 0 while i < len(text): if text[i] == "\\" and i + 1 < len(text): # Escaped character - keep both backslash and next char current.append(text[i]) current.append(text[i + 1]) i += 2 elif text[i] == ";": # Unescaped semicolon - split point result.append(unescape_backslash("".join(current))) current = [] i += 1 else: current.append(text[i]) i += 1 # Add final part result.append(unescape_backslash("".join(current))) return result
__all__ = [ "split_on_unescaped_comma", "split_on_unescaped_semicolon", "unescape_backslash", "unescape_list_or_string", ]