Source code for corr_vars.utils.logging
import logging
import json
import os
import re
import textwrap
from collections.abc import Mapping, Collection, Iterable, Callable
from typing import Any, Literal, TypeVar
T = TypeVar("T")
__all__ = [
"CustomFormatter",
"configure_logger_level_and_handlers",
"text_indent",
"text_tree_indent",
"log_collection",
"log_multiline_string",
"log_dict",
"pretty_join",
]
[docs]
class CustomFormatter(logging.Formatter):
"""Logging formatter that optionally colorizes output and highlights numbers.
Features:
- Optionally colorizes level names and certain keywords (e.g. SUCCESS, DROP).
- Optionally underlines numeric tokens in messages for emphasis.
- Respects NO_COLOR environment variable when deciding to colorize.
"""
white = "\x1b[37m"
green = "\x1b[32m"
yellow = "\x1b[33m"
red = "\x1b[31m"
bold_red = "\x1b[31;1m"
reset = "\x1b[0m"
underline = "\x1b[4m"
LEVEL_COLORS = {
"DEBUG": white,
"INFO": white,
"WARNING": yellow,
"ERROR": red,
"CRITICAL": bold_red,
}
def __init__(
self,
colored_output: bool | None = None,
formatted_numbers: bool = True,
fmt: str | None = None,
datefmt: str | None = None,
style: Literal["%", "{", "$"] = "%",
validate: bool = True,
*,
defaults: Mapping[str, Any] | None = None,
) -> None:
"""Create a CustomFormatter.
Args:
colored_output: If True/False forces colored output on/off. If None, color is
enabled unless NO_COLOR env var is set.
formatted_numbers: If True, numeric tokens in messages are underlined.
fmt: Format string passed to logging.Formatter.
datefmt: Date format string passed to logging.Formatter.
style: Format style passed to logging.Formatter.
validate: Validation flag passed to logging.Formatter.
defaults: Defaults mapping passed to logging.Formatter.
"""
# Colored by default (except for NO_COLOR)
# See https://no-color.org/
if colored_output is not None:
self.colored = colored_output
else:
if "NO_COLOR" in os.environ:
self.colored = False
else:
self.colored = True
self.format_numbers = formatted_numbers
super().__init__(fmt, datefmt, style, validate, defaults=defaults)
[docs]
def format(self, record: logging.LogRecord) -> str:
"""Format a LogRecord.
Performs the following extra steps before delegating to the base formatter:
- Optionally underline numeric tokens in record.msg for emphasis.
- Optionally colorize the level name and highlight certain keywords
(e.g. 'SUCCESS' in green, 'DROP' in red).
Args:
record: logging.LogRecord to be formatted.
Returns:
The formatted log message string.
"""
def number_repl(match):
return self.underline + match.group(0) + self.reset
msg = str(record.msg)
if self.format_numbers:
msg = re.sub(
r"\b\d+(?:(?:[\.,:+-]|(?:e[+-]))\d+)*%?",
number_repl,
msg,
)
if self.colored:
levelname = record.levelname
record.levelname = f"{self.LEVEL_COLORS[levelname]}{levelname}{self.reset}"
msg = msg.replace("SUCCESS", f"{self.green}SUCCESS{self.reset}")
msg = msg.replace("DROP", f"{self.red}DROP{self.reset}")
record.msg = msg
return super().format(record)
[docs]
def configure_logger_level_and_handlers(
logger: logging.Logger,
level: int | str = logging.INFO,
file_path: str | None = None,
file_mode: str = "a",
verbose_fmt: bool = False,
colored_output: bool | None = None,
formatted_numbers: bool = True,
) -> None:
"""Configure logger level and attach standard handlers.
This helper:
- Sets the logger level.
- Removes existing handlers.
- Adds a StreamHandler using CustomFormatter.
- Optionally adds a FileHandler when file_path is provided.
Args:
logger: Logger instance to configure.
level: Logging level (int or string).
file_path: Optional filesystem path to write logs to.
file_mode: File mode for the file handler (default 'a').
verbose_fmt: If True, use a verbose formatter including timestamps and source file.
colored_output: If True/False forces colored output for the stream handler.
formatted_numbers: If True, numbers in messages are underlined.
"""
logger.setLevel(level)
for handler in logger.handlers:
logger.removeHandler(handler)
fmt = (
"%(asctime)s - %(name)s - %(levelname)s - %(message)s (%(filename)s:%(lineno)d)"
if verbose_fmt
else "%(levelname)s - %(message)s"
)
if file_path:
_file_handler = logging.FileHandler(filename=file_path, mode=file_mode)
_file_handler.setLevel(level)
_file_handler.setFormatter(logging.Formatter(fmt=fmt))
logger.addHandler(_file_handler)
_stream_handler = logging.StreamHandler()
_stream_handler.setLevel(level)
_stream_handler.setFormatter(
CustomFormatter(
fmt=fmt, colored_output=colored_output, formatted_numbers=formatted_numbers
)
)
logger.addHandler(_stream_handler)
[docs]
def text_indent(
text: str | Collection[str],
indent: str | int = 0,
) -> str:
"""Indent a block of text.
Args:
text: The input text block or collection of lines to indent.
indent: String or number of spaces to use for indentation.
Returns:
The indented text block.
"""
prefix = indent if isinstance(indent, str) else " " * indent
multiline = text if isinstance(text, str) else "\n".join(text)
return textwrap.indent(text=multiline, prefix=prefix, predicate=lambda line: True)
[docs]
def text_tree_indent(
text: str | Collection[str],
) -> str:
"""Indent a block of text with tree-style prefixes.
Each line is prefixed with '├── ' except the last line, which is prefixed with '└── '.
Args:
text: The input text block or collection of lines to indent.
Returns:
The tree-indented text block.
"""
lines = text.splitlines() if isinstance(text, str) else text
indented_lines = []
for i, line in enumerate(lines):
prefix = "└── " if i == len(lines) - 1 else "├── "
indented_lines.append(f"{prefix}{line}")
return "\n".join(indented_lines)
[docs]
def log_collection(
logger: logging.Logger,
collection: Collection[T],
level: int = logging.INFO,
as_tree: bool = True,
indent: str | int = 0,
serialiser: Callable[[T], str] = str,
) -> None:
"""Log elements of a collection in a readable form.
When `as_tree=True` each element is prefixed with tree-style markers:
├── for all but the last item
└── for the last item
When `as_tree=False` each item is indented uniformly without tree markers.
Args:
logger: Logger to emit the messages.
collection: Iterable collection of items to log.
level: Logging level to use for each emitted message.
as_tree: Whether to render the collection with tree markers.
indent: String or number of spaces to use for indentation when `as_tree=False`.
serialiser: Function to convert each item to a string.
"""
serialised = [serialiser(value) for value in collection]
indented = (
text_indent(text=serialised, indent=indent)
if not as_tree
else text_tree_indent(serialised)
)
for line in indented.splitlines():
logger.log(level, line)
[docs]
def log_multiline_string(
logger: logging.Logger,
multiline: str,
level: int = logging.INFO,
indent: str | int = 0,
) -> None:
"""Log a multiline string as separate lines.
Splits the input on newline characters and logs each line separately.
Useful for showing multi-line messages in logs.
Args:
logger: Logger to emit the messages.
multiline: The multi-line string to log.
level: Logging level to use for each emitted message.
"""
log_collection(
logger=logger,
collection=multiline.splitlines(),
level=level,
as_tree=False,
indent=indent,
)
[docs]
def log_dict(
logger: logging.Logger,
dictionary: dict[Any, Any],
level: int = logging.INFO,
json_indent: str | int = 0,
indent: str | int = 0,
) -> None:
"""Log a dictionary in pretty-printed JSON format.
Args:
logger: Logger to emit the messages.
dictionary: The dictionary to log.
level: Logging level to use for each emitted message.
"""
multiline = json.dumps(dictionary, indent=json_indent, sort_keys=True, default=str)
log_multiline_string(logger=logger, multiline=multiline, level=level, indent=indent)
[docs]
def pretty_join(
input: Iterable[T],
sep: str = ", ",
width: int = 120,
indent: str | int = 0,
sort: bool = True,
serialiser: Callable[[T], str] = str,
) -> str:
"""Join an iterable into a pretty-formatted, indented block.
Args:
input: Iterable of items to join.
sep: Separator string to use between items.
width: Maximum line width for wrapping.
indent: String or number of spaces to indent the output.
sort: Whether to sort the input strings before joining.
serialiser: Function to convert each input item to a string.
Returns:
A single string with the joined, wrapped, and indented content.
"""
iterable = [serialiser(input) for input in input]
if sort:
iterable = sorted(iterable)
return text_indent(
text=textwrap.fill(sep.join(iterable), width=width), indent=indent
)