Source code for corr_vars.utils.logging

import logging
import json
import os
import re
import textwrap

from collections.abc import Mapping, Collection, Iterable, Callable
from typing import Any, Literal, TypeVar

T = TypeVar("T")

__all__ = [
    "CustomFormatter",
    "configure_logger_level_and_handlers",
    "text_indent",
    "text_tree_indent",
    "log_collection",
    "log_multiline_string",
    "log_dict",
    "pretty_join",
]



[docs]
class CustomFormatter(logging.Formatter):
    """Logging formatter that optionally colorizes output and highlights numbers.

    Features:
    - Optionally colorizes level names and certain keywords (e.g. SUCCESS, DROP).
    - Optionally underlines numeric tokens in messages for emphasis.
    - Respects NO_COLOR environment variable when deciding to colorize.
    """

    white = "\x1b[37m"
    green = "\x1b[32m"
    yellow = "\x1b[33m"
    red = "\x1b[31m"
    bold_red = "\x1b[31;1m"
    reset = "\x1b[0m"

    underline = "\x1b[4m"

    LEVEL_COLORS = {
        "DEBUG": white,
        "INFO": white,
        "WARNING": yellow,
        "ERROR": red,
        "CRITICAL": bold_red,
    }

    def __init__(
        self,
        colored_output: bool | None = None,
        formatted_numbers: bool = True,
        fmt: str | None = None,
        datefmt: str | None = None,
        style: Literal["%", "{", "$"] = "%",
        validate: bool = True,
        *,
        defaults: Mapping[str, Any] | None = None,
    ) -> None:
        """Create a CustomFormatter.

        Args:
            colored_output: If True/False forces colored output on/off. If None, color is
                enabled unless NO_COLOR env var is set.
            formatted_numbers: If True, numeric tokens in messages are underlined.
            fmt: Format string passed to logging.Formatter.
            datefmt: Date format string passed to logging.Formatter.
            style: Format style passed to logging.Formatter.
            validate: Validation flag passed to logging.Formatter.
            defaults: Defaults mapping passed to logging.Formatter.
        """
        # Colored by default (except for NO_COLOR)
        # See https://no-color.org/
        if colored_output is not None:
            self.colored = colored_output
        else:
            if "NO_COLOR" in os.environ:
                self.colored = False
            else:
                self.colored = True
        self.format_numbers = formatted_numbers
        super().__init__(fmt, datefmt, style, validate, defaults=defaults)


[docs]
    def format(self, record: logging.LogRecord) -> str:
        """Format a LogRecord.

        Performs the following extra steps before delegating to the base formatter:

        - Optionally underline numeric tokens in record.msg for emphasis.
        - Optionally colorize the level name and highlight certain keywords
          (e.g. 'SUCCESS' in green, 'DROP' in red).

        Args:
            record: logging.LogRecord to be formatted.

        Returns:
            The formatted log message string.
        """

        def number_repl(match):
            return self.underline + match.group(0) + self.reset

        msg = str(record.msg)

        if self.format_numbers:
            msg = re.sub(
                r"\b\d+(?:(?:[\.,:+-]|(?:e[+-]))\d+)*%?",
                number_repl,
                msg,
            )

        if self.colored:
            levelname = record.levelname
            record.levelname = f"{self.LEVEL_COLORS[levelname]}{levelname}{self.reset}"

            msg = msg.replace("SUCCESS", f"{self.green}SUCCESS{self.reset}")
            msg = msg.replace("DROP", f"{self.red}DROP{self.reset}")

        record.msg = msg
        return super().format(record)





[docs]
def configure_logger_level_and_handlers(
    logger: logging.Logger,
    level: int | str = logging.INFO,
    file_path: str | None = None,
    file_mode: str = "a",
    verbose_fmt: bool = False,
    colored_output: bool | None = None,
    formatted_numbers: bool = True,
) -> None:
    """Configure logger level and attach standard handlers.

    This helper:
    - Sets the logger level.
    - Removes existing handlers.
    - Adds a StreamHandler using CustomFormatter.
    - Optionally adds a FileHandler when file_path is provided.

    Args:
        logger: Logger instance to configure.
        level: Logging level (int or string).
        file_path: Optional filesystem path to write logs to.
        file_mode: File mode for the file handler (default 'a').
        verbose_fmt: If True, use a verbose formatter including timestamps and source file.
        colored_output: If True/False forces colored output for the stream handler.
        formatted_numbers: If True, numbers in messages are underlined.
    """
    logger.setLevel(level)

    for handler in logger.handlers:
        logger.removeHandler(handler)

    fmt = (
        "%(asctime)s - %(name)s - %(levelname)s - %(message)s (%(filename)s:%(lineno)d)"
        if verbose_fmt
        else "%(levelname)s - %(message)s"
    )

    if file_path:
        _file_handler = logging.FileHandler(filename=file_path, mode=file_mode)
        _file_handler.setLevel(level)
        _file_handler.setFormatter(logging.Formatter(fmt=fmt))

        logger.addHandler(_file_handler)

    _stream_handler = logging.StreamHandler()
    _stream_handler.setLevel(level)
    _stream_handler.setFormatter(
        CustomFormatter(
            fmt=fmt, colored_output=colored_output, formatted_numbers=formatted_numbers
        )
    )

    logger.addHandler(_stream_handler)




[docs]
def text_indent(
    text: str | Collection[str],
    indent: str | int = 0,
) -> str:
    """Indent a block of text.

    Args:
        text: The input text block or collection of lines to indent.
        indent: String or number of spaces to use for indentation.

    Returns:
        The indented text block.
    """
    prefix = indent if isinstance(indent, str) else " " * indent
    multiline = text if isinstance(text, str) else "\n".join(text)
    return textwrap.indent(text=multiline, prefix=prefix, predicate=lambda line: True)




[docs]
def text_tree_indent(
    text: str | Collection[str],
) -> str:
    """Indent a block of text with tree-style prefixes.

    Each line is prefixed with '├── ' except the last line, which is prefixed with '└── '.

    Args:
        text: The input text block or collection of lines to indent.

    Returns:
        The tree-indented text block.
    """
    lines = text.splitlines() if isinstance(text, str) else text
    indented_lines = []
    for i, line in enumerate(lines):
        prefix = "└── " if i == len(lines) - 1 else "├── "
        indented_lines.append(f"{prefix}{line}")
    return "\n".join(indented_lines)




[docs]
def log_collection(
    logger: logging.Logger,
    collection: Collection[T],
    level: int = logging.INFO,
    as_tree: bool = True,
    indent: str | int = 0,
    serialiser: Callable[[T], str] = str,
) -> None:
    """Log elements of a collection in a readable form.

    When `as_tree=True` each element is prefixed with tree-style markers:
      ├── for all but the last item
      └── for the last item

    When `as_tree=False` each item is indented uniformly without tree markers.

    Args:
        logger: Logger to emit the messages.
        collection: Iterable collection of items to log.
        level: Logging level to use for each emitted message.
        as_tree: Whether to render the collection with tree markers.
        indent: String or number of spaces to use for indentation when `as_tree=False`.
        serialiser: Function to convert each item to a string.
    """
    serialised = [serialiser(value) for value in collection]
    indented = (
        text_indent(text=serialised, indent=indent)
        if not as_tree
        else text_tree_indent(serialised)
    )
    for line in indented.splitlines():
        logger.log(level, line)




[docs]
def log_multiline_string(
    logger: logging.Logger,
    multiline: str,
    level: int = logging.INFO,
    indent: str | int = 0,
) -> None:
    """Log a multiline string as separate lines.

    Splits the input on newline characters and logs each line separately.
    Useful for showing multi-line messages in logs.

    Args:
        logger: Logger to emit the messages.
        multiline: The multi-line string to log.
        level: Logging level to use for each emitted message.
    """
    log_collection(
        logger=logger,
        collection=multiline.splitlines(),
        level=level,
        as_tree=False,
        indent=indent,
    )




[docs]
def log_dict(
    logger: logging.Logger,
    dictionary: dict[Any, Any],
    level: int = logging.INFO,
    json_indent: str | int = 0,
    indent: str | int = 0,
) -> None:
    """Log a dictionary in pretty-printed JSON format.

    Args:
        logger: Logger to emit the messages.
        dictionary: The dictionary to log.
        level: Logging level to use for each emitted message.
    """
    multiline = json.dumps(dictionary, indent=json_indent, sort_keys=True, default=str)
    log_multiline_string(logger=logger, multiline=multiline, level=level, indent=indent)




[docs]
def pretty_join(
    input: Iterable[T],
    sep: str = ", ",
    width: int = 120,
    indent: str | int = 0,
    sort: bool = True,
    serialiser: Callable[[T], str] = str,
) -> str:
    """Join an iterable into a pretty-formatted, indented block.

    Args:
        input: Iterable of items to join.
        sep: Separator string to use between items.
        width: Maximum line width for wrapping.
        indent: String or number of spaces to indent the output.
        sort: Whether to sort the input strings before joining.
        serialiser: Function to convert each input item to a string.
    Returns:
        A single string with the joined, wrapped, and indented content.
    """
    iterable = [serialiser(input) for input in input]
    if sort:
        iterable = sorted(iterable)
    return text_indent(
        text=textwrap.fill(sep.join(iterable), width=width), indent=indent
    )