chrisbase
Advanced tools
+2
-2
@@ -1,4 +0,4 @@ | ||
| Metadata-Version: 2.1 | ||
| Metadata-Version: 2.2 | ||
| Name: chrisbase | ||
| Version: 0.5.7 | ||
| Version: 0.5.8 | ||
| Summary: Base library for python coding | ||
@@ -5,0 +5,0 @@ Home-page: https://github.com/chrisjihee/chrisbase |
+1
-1
| [metadata] | ||
| name = chrisbase | ||
| version = 0.5.7 | ||
| version = 0.5.8 | ||
| author = Jihee Ryu | ||
@@ -5,0 +5,0 @@ author_email = chrisjihee@naver.com |
@@ -1,4 +0,4 @@ | ||
| Metadata-Version: 2.1 | ||
| Metadata-Version: 2.2 | ||
| Name: chrisbase | ||
| Version: 0.5.7 | ||
| Version: 0.5.8 | ||
| Summary: Base library for python coding | ||
@@ -5,0 +5,0 @@ Home-page: https://github.com/chrisjihee/chrisbase |
+191
-44
@@ -0,1 +1,2 @@ | ||
| import itertools | ||
| import json | ||
@@ -6,3 +7,4 @@ import logging | ||
| import warnings | ||
| from dataclasses import dataclass, field | ||
| from dataclasses import dataclass | ||
| from dataclasses import field | ||
| from datetime import datetime | ||
@@ -13,3 +15,4 @@ from datetime import timedelta | ||
| from pathlib import Path | ||
| from typing import List, Optional, Mapping, Any, Iterable, Tuple, ClassVar | ||
| from typing import Any, Callable, Optional, ClassVar | ||
| from typing import Iterable, List, Tuple, Mapping | ||
@@ -24,6 +27,7 @@ import pandas as pd | ||
| from more_itertools import ichunked | ||
| from pydantic import BaseModel | ||
| from pydantic import BaseModel, Field, model_validator, ConfigDict | ||
| from pymongo import MongoClient | ||
| from typing_extensions import Self | ||
| from chrisbase.io import get_hostname, get_hostaddr, current_file, first_or, cwd, hr, flush_or, make_parent_dir, setup_unit_logger, setup_dual_logger, open_file, file_lines, to_table_lines, new_path, get_http_clients | ||
| from chrisbase.io import get_hostname, get_hostaddr, current_file, first_or, cwd, hr, flush_or, make_parent_dir, setup_unit_logger, setup_dual_logger, open_file, file_lines, new_path, get_http_clients, log_table, LoggingFormat | ||
| from chrisbase.time import now, str_delta | ||
@@ -36,9 +40,141 @@ from chrisbase.util import tupled, SP, NO, to_dataframe | ||
| class AppTyper(typer.Typer): | ||
| def __init__(self): | ||
| def __init__(self, *args, **kwargs): | ||
| super().__init__( | ||
| *args, | ||
| add_completion=False, | ||
| pretty_exceptions_enable=False, | ||
| ) | ||
| **kwargs) | ||
| @staticmethod | ||
| def run(function: Callable[..., Any], **kwargs) -> None: | ||
| app = AppTyper(**kwargs) | ||
| app.command()(function) | ||
| app() | ||
| class NewProjectEnv(BaseModel): | ||
| hostname: str = get_hostname() | ||
| hostaddr: str = get_hostaddr() | ||
| global_rank: int = Field(default=-1) | ||
| local_rank: int = Field(default=-1) | ||
| node_rank: int = Field(default=-1) | ||
| world_size: int = Field(default=-1) | ||
| time_stamp: str = Field(default=now('%m%d-%H%M%S')) | ||
| python_path: Path = Path(sys.executable).absolute() | ||
| current_dir: Path = Path().absolute() | ||
| current_file: Path = Path(sys.argv[0]) | ||
| command_args: list[str] = sys.argv[1:] | ||
| output_home: str | Path = Field(default="output") | ||
| output_name: str | Path = Field(default=None) | ||
| run_version: str | int | Path | None = Field(default=None) | ||
| output_file: str | Path = Field(default=None) | ||
| logging_file: str | Path = Field(default=None) | ||
| logging_level: int = Field(default=logging.INFO) | ||
| logging_format: str = Field(default=LoggingFormat.BRIEF_00) | ||
| datetime_format: str = Field(default="[%m.%d %H:%M:%S]") | ||
| argument_file: str | Path = Field(default=None) | ||
| random_seed: int = Field(default=None) | ||
| max_workers: int = Field(default=1) | ||
| debugging: bool = Field(default=False) | ||
| output_dir: Path | None = Field(default=None, init=False) | ||
| @model_validator(mode='after') | ||
| def after(self) -> Self: | ||
| if self.output_home: | ||
| self.output_home = Path(self.output_home) | ||
| self.output_dir = self.output_home | ||
| if self.output_name: | ||
| self.output_dir = self.output_dir / self.output_name | ||
| if self.run_version: | ||
| self.output_dir = self.output_dir / str(self.run_version) | ||
| self.setup_logger(self.logging_level) | ||
| return self | ||
| def setup_logger(self, logging_level: int = logging.INFO): | ||
| if self.output_dir and self.logging_file: | ||
| setup_dual_logger( | ||
| level=logging_level, fmt=self.logging_format, datefmt=self.datetime_format, stream=sys.stdout, | ||
| filename=self.output_dir / self.logging_file, | ||
| ) | ||
| else: | ||
| setup_unit_logger( | ||
| level=logging_level, fmt=self.logging_format, datefmt=self.datetime_format, stream=sys.stdout, | ||
| ) | ||
| return self | ||
| class TimeChecker(BaseModel): | ||
| t1: datetime = datetime.now() | ||
| t2: datetime = datetime.now() | ||
| started: str | None = Field(default=None) | ||
| settled: str | None = Field(default=None) | ||
| elapsed: str | None = Field(default=None) | ||
| def set_started(self): | ||
| self.started = now() | ||
| self.settled = None | ||
| self.elapsed = None | ||
| self.t1 = datetime.now() | ||
| return self | ||
| def set_settled(self): | ||
| self.t2 = datetime.now() | ||
| self.settled = now() | ||
| self.elapsed = str_delta(self.t2 - self.t1) | ||
| return self | ||
| class NewCommonArguments(BaseModel): | ||
| env: NewProjectEnv = Field(default=None) | ||
| time: TimeChecker = Field(default_factory=TimeChecker) | ||
| def dataframe(self, columns=None) -> pd.DataFrame: | ||
| if not columns: | ||
| columns = [self.__class__.__name__, "value"] | ||
| df = pd.concat([ | ||
| to_dataframe(columns=columns, raw=self.env, data_prefix="env"), | ||
| to_dataframe(columns=columns, raw=self.time, data_prefix="time"), | ||
| ]).reset_index(drop=True) | ||
| return df | ||
| def info_args(self, c="-", w=137): | ||
| log_table(logger, self.dataframe(), c=c, w=w, level=logging.INFO, tablefmt="tsv", bordered=True) | ||
| return self | ||
| def save_args(self, to: Path | str = None) -> Path | None: | ||
| if self.env.output_dir and self.env.argument_file: | ||
| args_file = to if to else self.env.output_dir / self.env.argument_file | ||
| args_json = self.model_dump_json(indent=2) | ||
| make_parent_dir(args_file).write_text(args_json, encoding="utf-8") | ||
| return args_file | ||
| else: | ||
| return None | ||
| class NewIOArguments(NewCommonArguments): | ||
| model_config = ConfigDict(arbitrary_types_allowed=True) | ||
| input: "InputOption" = Field(default=None) | ||
| output: "OutputOption" = Field(default=None) | ||
| option: BaseModel | None = None | ||
| def __post_init__(self): | ||
| super().__post_init__() | ||
| def dataframe(self, columns=None) -> pd.DataFrame: | ||
| if not columns: | ||
| columns = [self.__class__.__name__, "value"] | ||
| return pd.concat([ | ||
| super().dataframe(columns=columns), | ||
| to_dataframe(columns=columns, raw=self.input, data_prefix="input", data_exclude=["file", "table", "index"]), | ||
| to_dataframe(columns=columns, raw=self.input.file, data_prefix="input.file") if self.input.file else None, | ||
| to_dataframe(columns=columns, raw=self.input.table, data_prefix="input.table") if self.input.table else None, | ||
| to_dataframe(columns=columns, raw=self.input.index, data_prefix="input.index") if self.input.index else None, | ||
| to_dataframe(columns=columns, raw=self.output, data_prefix="output", data_exclude=["file", "table", "index"]), | ||
| to_dataframe(columns=columns, raw=self.output.file, data_prefix="output.file") if self.output.file else None, | ||
| to_dataframe(columns=columns, raw=self.output.table, data_prefix="output.table") if self.output.table else None, | ||
| to_dataframe(columns=columns, raw=self.output.index, data_prefix="output.index") if self.output.index else None, | ||
| to_dataframe(columns=columns, raw=self.option, data_prefix="option") if self.option else None, | ||
| ]).reset_index(drop=True) | ||
| @dataclass | ||
@@ -227,3 +363,3 @@ class TypedData(DataClassJsonMixin): | ||
| self.opt: FileOption = opt | ||
| self.path: Path | None = None | ||
| self.path: Path = self.opt.home / self.opt.name | ||
| self.fp: IOBase | None = None | ||
@@ -506,3 +642,3 @@ | ||
| hostaddr: str = field(init=False) | ||
| time_stamp: str = now('%m%d.%H%M%S') | ||
| time_stamp: str = now('%m%d-%H%M%S') | ||
| python_path: Path = field(init=False) | ||
@@ -540,5 +676,4 @@ current_dir: Path = field(init=False) | ||
| def info_env(self): | ||
| for line in to_table_lines(to_dataframe(self)): | ||
| logger.info(line) | ||
| def info_env(self, c="-", w=137): | ||
| log_table(logger, to_dataframe(self), c=c, w=w, level=logging.INFO, tablefmt="tsv", bordered=True) | ||
| return self | ||
@@ -573,24 +708,2 @@ | ||
| @dataclass | ||
| class TimeChecker(ResultData): | ||
| t1: datetime = datetime.now() | ||
| t2: datetime = datetime.now() | ||
| started: str | None = field(default=None) | ||
| settled: str | None = field(default=None) | ||
| elapsed: str | None = field(default=None) | ||
| def set_started(self): | ||
| self.started = now() | ||
| self.settled = None | ||
| self.elapsed = None | ||
| self.t1 = datetime.now() | ||
| return self | ||
| def set_settled(self): | ||
| self.t2 = datetime.now() | ||
| self.settled = now() | ||
| self.elapsed = str_delta(self.t2 - self.t1) | ||
| return self | ||
| @dataclass | ||
| class CommonArguments(ArgumentGroupData): | ||
@@ -613,5 +726,4 @@ tag = None | ||
| def info_args(self): | ||
| for line in to_table_lines(self.dataframe()): | ||
| logger.info(line) | ||
| def info_args(self, c="-", w=137): | ||
| log_table(logger, self.dataframe(), c=c, w=w, level=logging.INFO, tablefmt="tsv", bordered=True) | ||
| return self | ||
@@ -656,2 +768,23 @@ | ||
| @dataclass | ||
| class Counter: | ||
| step: int = 1 | ||
| _incs = itertools.count() | ||
| _base = itertools.count() | ||
| def __str__(self): | ||
| return f"Counter(val={self.val()}, step={self.step})" | ||
| def __repr__(self): | ||
| return f"Counter(val={self.val()}, step={self.step})" | ||
| def inc(self) -> int: | ||
| for _ in range(self.step): | ||
| next(self._incs) | ||
| return self.val() | ||
| def val(self) -> int: | ||
| return next(self._incs) - next(self._base) | ||
| class RuntimeChecking: | ||
@@ -671,4 +804,4 @@ def __init__(self, args: CommonArguments): | ||
| class JobTimer: | ||
| def __init__(self, name=None, args: CommonArguments = None, prefix=None, postfix=None, | ||
| verbose=True, mt=0, mb=0, pt=0, pb=0, rt=0, rb=0, rc='-', rw=137, | ||
| def __init__(self, name=None, args: CommonArguments | NewCommonArguments = None, prefix=None, postfix=None, | ||
| verbose=1, mt=0, mb=0, pt=0, pb=0, rt=0, rb=0, rc='-', rw=137, | ||
| flush_sec=0.1, mute_loggers=None, mute_warning=None): | ||
@@ -688,3 +821,3 @@ self.name = name | ||
| self.rw: int = rw | ||
| self.verbose: bool = verbose | ||
| self.verbose: int = verbose | ||
| assert isinstance(mute_loggers, (type(None), str, list, tuple, set)) | ||
@@ -709,3 +842,3 @@ assert isinstance(mute_warning, (type(None), str, list, tuple, set)) | ||
| flush_or(sys.stdout, sys.stderr, sec=self.flush_sec if self.flush_sec else None) | ||
| if self.verbose: | ||
| if self.verbose > 0: | ||
| if self.mt > 0: | ||
@@ -728,5 +861,6 @@ for _ in range(self.mt): | ||
| self.args.time.set_started() | ||
| if self.verbose: | ||
| if self.verbose >= 1: | ||
| self.args.info_args(c='-', w=self.rw) | ||
| if self.verbose >= 2: | ||
| self.args.save_args() | ||
| self.args.info_args() | ||
| self.t1 = datetime.now() | ||
@@ -743,7 +877,8 @@ return self | ||
| self.args.time.set_settled() | ||
| self.args.save_args() | ||
| if self.verbose >= 2: | ||
| self.args.save_args() | ||
| self.t2 = datetime.now() | ||
| self.td = self.t2 - self.t1 | ||
| flush_or(sys.stdout, sys.stderr, sec=self.flush_sec if self.flush_sec else None) | ||
| if self.verbose: | ||
| if self.verbose > 0: | ||
| if self.pb > 0: | ||
@@ -774,1 +909,13 @@ for _ in range(self.pb): | ||
| exit(22) | ||
| def find_sublist_range(haystack: List[Any], sublist: List[Any], case_sensitive: bool = True) -> List[int]: | ||
| if not case_sensitive: | ||
| haystack = [x.lower() if isinstance(x, str) else x for x in haystack] | ||
| sublist = [x.lower() if isinstance(x, str) else x for x in sublist] | ||
| sub_len = len(sublist) | ||
| for i in range(len(haystack) - sub_len + 1): | ||
| if haystack[i:i + sub_len] == sublist: | ||
| return list(range(i, i + sub_len)) | ||
| return list() |
+210
-36
| import bz2 | ||
| import csv | ||
| import gzip | ||
@@ -6,2 +7,3 @@ import json | ||
| import os | ||
| import re | ||
| import shutil | ||
@@ -18,3 +20,3 @@ import socket | ||
| from time import sleep | ||
| from typing import Iterable | ||
| from typing import Iterable, List | ||
@@ -25,2 +27,3 @@ import httpx | ||
| from tabulate import tabulate | ||
| from tensorboard.backend.event_processing import event_accumulator | ||
@@ -44,24 +47,61 @@ from chrisbase.time import from_timestamp | ||
| BRIEF_20: str = ' ┇ '.join(['%(asctime)s', '%(name)20s', '%(message)s']) | ||
| CHECK_00: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(message)s']) | ||
| CHECK_12: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(name)12s', '%(message)s']) | ||
| CHECK_16: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(name)16s', '%(message)s']) | ||
| CHECK_20: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(name)20s', '%(message)s']) | ||
| CHECK_24: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(name)24s', '%(message)s']) | ||
| CHECK_28: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(name)28s', '%(message)s']) | ||
| CHECK_32: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(name)32s', '%(message)s']) | ||
| CHECK_36: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(name)36s', '%(message)s']) | ||
| CHECK_40: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(name)40s', '%(message)s']) | ||
| CHECK_48: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(name)48s', '%(message)s']) | ||
| DEBUG_00: str = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(message)s']) | ||
| DEBUG_12: str = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(name)12s', '%(message)s']) | ||
| DEBUG_16: str = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(name)16s', '%(message)s']) | ||
| DEBUG_20: str = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(name)20s', '%(message)s']) | ||
| DEBUG_24: str = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(name)24s', '%(message)s']) | ||
| DEBUG_28: str = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(name)28s', '%(message)s']) | ||
| DEBUG_32: str = ' ┇ '.join(['%(pathname)90s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(name)32s', '%(message)s']) | ||
| DEBUG_36: str = ' ┇ '.join(['%(pathname)90s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(name)36s', '%(message)s']) | ||
| DEBUG_40: str = ' ┇ '.join(['%(pathname)120s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(name)40s', '%(message)s']) | ||
| DEBUG_48: str = ' ┇ '.join(['%(pathname)120s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(name)48s', '%(message)s']) | ||
| CHECK_00: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(message)s']) | ||
| CHECK_12: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)12s', '%(message)s']) | ||
| CHECK_16: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)16s', '%(message)s']) | ||
| CHECK_20: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)20s', '%(message)s']) | ||
| CHECK_24: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)24s', '%(message)s']) | ||
| CHECK_28: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)28s', '%(message)s']) | ||
| CHECK_32: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)32s', '%(message)s']) | ||
| CHECK_36: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)36s', '%(message)s']) | ||
| CHECK_40: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)40s', '%(message)s']) | ||
| CHECK_48: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)48s', '%(message)s']) | ||
| TRACE_12: str = ' ┇ '.join(['%(asctime)s', '%(filename)12s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_16: str = ' ┇ '.join(['%(asctime)s', '%(filename)16s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_20: str = ' ┇ '.join(['%(asctime)s', '%(filename)20s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_24: str = ' ┇ '.join(['%(asctime)s', '%(filename)24s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_28: str = ' ┇ '.join(['%(asctime)s', '%(filename)28s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_32: str = ' ┇ '.join(['%(asctime)s', '%(filename)32s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_36: str = ' ┇ '.join(['%(asctime)s', '%(filename)36s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_40: str = ' ┇ '.join(['%(asctime)s', '%(filename)40s:%(lineno)-4d', '%(message)s']) | ||
| DEBUG_00: str = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(message)s']) | ||
| DEBUG_12: str = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)12s', '%(message)s']) | ||
| DEBUG_16: str = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)16s', '%(message)s']) | ||
| DEBUG_20: str = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)20s', '%(message)s']) | ||
| DEBUG_24: str = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)24s', '%(message)s']) | ||
| DEBUG_28: str = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)28s', '%(message)s']) | ||
| DEBUG_32: str = ' ┇ '.join(['%(pathname)90s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)32s', '%(message)s']) | ||
| DEBUG_36: str = ' ┇ '.join(['%(pathname)90s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)36s', '%(message)s']) | ||
| DEBUG_40: str = ' ┇ '.join(['%(pathname)120s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)40s', '%(message)s']) | ||
| DEBUG_48: str = ' ┇ '.join(['%(pathname)120s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)48s', '%(message)s']) | ||
| class LoggerWriter: | ||
| def __init__(self, logger: logging.Logger, level: int = logging.INFO): | ||
| """ | ||
| A simple wrapper to use a logger like a file-like stream. | ||
| :param logger: The logger instance to which messages will be sent. | ||
| :param level: Logging level to use (default: logging.INFO). | ||
| """ | ||
| self.logger = logger | ||
| self.level = level | ||
| def write(self, msg: str): | ||
| """ | ||
| Emulates the behavior of a stream's write method. | ||
| Non-empty lines are forwarded to the logger at the given level. | ||
| """ | ||
| # Strip out extra whitespace/newlines and only log non-empty lines | ||
| msg = msg.rstrip() | ||
| if msg: | ||
| self.logger.log(self.level, msg) | ||
| def flush(self): | ||
| """ | ||
| Emulates the behavior of a stream's flush method. | ||
| In this context, we generally do not need to do anything special for flush. | ||
| """ | ||
| pass | ||
| class MuteStd: | ||
@@ -162,4 +202,4 @@ def __init__(self, out=None, err=None, flush_sec=0.0, mute_warning=None, mute_logger=None): | ||
| def hr(c="=", w=137, t=0, b=0, title=''): | ||
| # w=137-26 : %(asctime)s %(levelname)-8s %(message)s | ||
| # w=137-47 : %(asctime)s %(levelname)-8s %(filename)15s:%(lineno)-4d %(message)s | ||
| # w=137-26 : %(asctime)s %(levelname)-7s %(message)s | ||
| # w=137-47 : %(asctime)s %(levelname)-7s %(filename)15s:%(lineno)-4d %(message)s | ||
| # w=137 (for ipynb on Chrome using D2Coding 13pt) with scroll | ||
@@ -202,3 +242,3 @@ # w=139 (for ipynb on Chrome using D2Coding 13pt) without scroll | ||
| def str_table(tabular_data, headers=(), tablefmt="pipe", showindex="default", transposed_df=False, **kwargs): | ||
| def str_table(tabular_data, headers=(), tablefmt="plain", showindex="default", transposed_df=False, **kwargs): | ||
| if not headers and isinstance(tabular_data, pd.DataFrame): | ||
@@ -208,6 +248,11 @@ if showindex is True or showindex == "default" or showindex == "always" or \ | ||
| if transposed_df: | ||
| index_header = '#' | ||
| if tabular_data.index.name: | ||
| index_header = tabular_data.index.name | ||
| elif tabular_data.index.names: | ||
| index_header = ' '.join(tabular_data.index.names) | ||
| if isinstance(tabular_data.columns, pd.RangeIndex): | ||
| headers = ['key'] + list(map(str(range(1, len(tabular_data.columns) + 1)))) | ||
| headers = [index_header] + list(map(str(range(1, len(tabular_data.columns) + 1)))) | ||
| else: | ||
| headers = ['key'] + list(tabular_data.columns) | ||
| headers = [index_header] + list(tabular_data.columns) | ||
| else: | ||
@@ -221,11 +266,19 @@ headers = ['#'] + list(tabular_data.columns) | ||
| def to_table_lines(*args, tablefmt="presto", border_idx=1, **kwargs): | ||
| def to_table_lines(*args, c="-", w=137, left='', tablefmt="plain", header_idx=0, bordered=False, **kwargs): | ||
| table = str_table(*args, **kwargs, tablefmt=tablefmt) | ||
| lines = table.splitlines() | ||
| border = lines[border_idx] | ||
| lines = [border] + lines + [border] | ||
| for line in lines: | ||
| if bordered: | ||
| border = hr(c=c, w=w) | ||
| lines = ([border] + lines[:header_idx + 1] + | ||
| [border] + lines[header_idx + 1:] + | ||
| [border]) | ||
| for line in lines if not left else [left + line for line in lines]: | ||
| yield line | ||
| def log_table(my_logger, *args, c="-", w=137, level=logging.INFO, **kwargs): | ||
| for line in to_table_lines(*args, **kwargs, c=c, w=w): | ||
| my_logger.log(level, line) | ||
| def file_table(*args, file=sys_stdout, **kwargs): | ||
@@ -296,3 +349,3 @@ print(str_table(*args, **kwargs), file=file) | ||
| def paths(path, accept_fn=lambda _: True): | ||
| def paths(path, accept_fn=lambda _: True) -> List[Path]: | ||
| assert path, f"No path: {path}" | ||
@@ -307,11 +360,15 @@ path = Path(path) | ||
| def dirs(path): | ||
| def dirs(path) -> List[Path]: | ||
| return paths(path, accept_fn=lambda x: x.is_dir()) | ||
| def files(path): | ||
| def files(path) -> List[Path]: | ||
| return paths(path, accept_fn=lambda x: x.is_file()) | ||
| def glob_dirs(path, glob: str): | ||
| def non_empty_files(path) -> List[Path]: | ||
| return paths(path, accept_fn=lambda x: x.is_file() and x.stat().st_size > 0) | ||
| def glob_dirs(path, glob: str) -> List[Path]: | ||
| path = Path(path) | ||
@@ -321,3 +378,3 @@ return sorted([x for x in path.glob(glob) if x.is_dir()]) | ||
| def glob_files(path, glob: str): | ||
| def glob_files(path, glob: str) -> List[Path]: | ||
| path = Path(path) | ||
@@ -384,2 +441,6 @@ return sorted([x for x in path.glob(glob) if x.is_file()]) | ||
| def all_line_list(path, mini=None): | ||
| return list(all_lines(path, mini)) | ||
| def tsv_lines(*args, **kwargs): | ||
@@ -393,5 +454,21 @@ return map(lambda x: x.split('\t'), all_lines(*args, **kwargs)) | ||
| def text_blocks(path) -> Iterable[List[str]]: | ||
| block = [] | ||
| with open(path, mode="r", encoding="utf-8") as f: | ||
| for line in f: | ||
| line = line[:-1] | ||
| if not line: | ||
| if block: | ||
| yield block | ||
| block = [] | ||
| else: | ||
| block.append(line) | ||
| if block: | ||
| yield block | ||
| def new_path(path, post=None, pre=None, sep='-') -> Path: | ||
| path = Path(path) | ||
| new_name = (f"{pre}{sep}" if pre else "") + path.stem + (f"{sep}{post}" if post else "") | ||
| new_name = (f"{pre}{sep}" if pre is not None else "") + path.stem + (f"{sep}{post}" if post is not None else "") | ||
| return path.parent / (new_name + NO.join(path.suffixes)) | ||
@@ -471,2 +548,18 @@ | ||
| # define function to normalize simple list in json | ||
| def normalize_simple_list_in_json(json_input): | ||
| json_output = [] | ||
| pattern = re.compile(r"\[[^\[\]]+?]") | ||
| if re.search(pattern, json_input): | ||
| pre_end = 0 | ||
| for m in re.finditer(pattern, json_input): | ||
| json_output.append(m.string[pre_end: m.start()]) | ||
| json_output.append("[" + " ".join(m.group().split()).removeprefix("[ ").removesuffix(" ]") + "]") | ||
| pre_end = m.end() | ||
| json_output.append(m.string[pre_end:]) | ||
| return ''.join(json_output) | ||
| else: | ||
| return json_input | ||
| def open_file(path: str | Path, mode: str = "rb", **kwargs) -> IOBase: | ||
@@ -736,1 +829,82 @@ file = Path(path) | ||
| logger.debug(f"logging.getLogger({x.name:<20s}) = Logger(level={x.level}, handlers={x.handlers}, disabled={x.disabled}, propagate={x.propagate}, parent={x.parent})") | ||
| def set_verbosity_debug(*names): | ||
| for name in names: | ||
| logging.getLogger(name).setLevel(logging.DEBUG) | ||
| def set_verbosity_info(*names): | ||
| for name in names: | ||
| logging.getLogger(name).setLevel(logging.INFO) | ||
| def set_verbosity_warning(*names): | ||
| for name in names: | ||
| logging.getLogger(name).setLevel(logging.WARNING) | ||
| def set_verbosity_error(*names): | ||
| for name in names: | ||
| logging.getLogger(name).setLevel(logging.ERROR) | ||
| def do_nothing(*args, **kwargs): | ||
| pass | ||
| def info_r(x, *y, **z): | ||
| x = str(x).rstrip() | ||
| logger.info(x, *y, **z) | ||
| def tb_events_to_csv( | ||
| event_file: str | Path, # 단일 event 파일 경로 (예: "output/runs/events.out.tfevents.xxxx") | ||
| output_file: str | Path, # 내보낼 CSV 경로 | ||
| purge_orphaned_data=True, | ||
| ): | ||
| """ | ||
| 지정한 TensorBoard 이벤트 로그(event_file)를 파싱하여 | ||
| CSV로 저장합니다. | ||
| - out_csv_path가 이미 존재하면 덮어씁니다. | ||
| - event_file에 여러 Scalar 태그가 존재할 경우, 모든 태그를 모아 | ||
| [wall_time, step, tag, value] 형태로 CSV 파일에 기록합니다. | ||
| """ | ||
| ea = event_accumulator.EventAccumulator( | ||
| str(event_file), | ||
| purge_orphaned_data=purge_orphaned_data # or False | ||
| ) | ||
| ea.Reload() | ||
| scalar_tags = ea.Tags().get("scalars", []) | ||
| with open(output_file, "w", newline="", encoding="utf-8") as f: | ||
| writer = csv.writer(f) | ||
| writer.writerow(["wall_time", "step", "tag", "value"]) # CSV 헤더 | ||
| for tag in scalar_tags: | ||
| # ea.Scalars(tag)는 해당 tag로 기록된 전체 log를 리스트 형태로 반환 | ||
| for event in ea.Scalars(tag): | ||
| writer.writerow([ | ||
| event.wall_time, # float(Unix 시간) | ||
| event.step, # int(전역 global_step) | ||
| tag, # 예: "eval/loss", "train/loss" 등 | ||
| event.value # 실제 측정값 | ||
| ]) | ||
| def convert_all_events_in_dir(log_dir: str | Path): | ||
| """ | ||
| Converts all TensorBoard event files in `log_dir` to CSV. | ||
| Each event file produces a separate CSV file. | ||
| """ | ||
| input_files = os.path.join(log_dir, "**/events.out.tfevents.*") | ||
| for input_file in files(input_files): | ||
| if not input_file.name.endswith(".csv"): | ||
| output_file = input_file.with_name(input_file.name + ".csv") | ||
| logger.info(f"Convert {input_file} to csv") | ||
| tb_events_to_csv(input_file, output_file) | ||
| def strip_lines(text): | ||
| return "\n".join([line.strip() for line in text.splitlines()]) |
@@ -0,1 +1,2 @@ | ||
| import time | ||
| from datetime import datetime, timedelta, timezone | ||
@@ -9,3 +10,5 @@ | ||
| def now(fmt='[%m.%d %H:%M:%S]', prefix=None): | ||
| def now(fmt='[%m.%d %H:%M:%S]', prefix=None, delay=0) -> str: | ||
| if delay: | ||
| time.sleep(delay) | ||
| if prefix: | ||
@@ -31,3 +34,9 @@ return f"{prefix} {datetime.now().strftime(fmt)}" | ||
| def from_timestamp(stamp, fmt='%Y/%m/%d %H:%M:%S'): | ||
| def now_stamp(delay=0) -> float: | ||
| if delay: | ||
| time.sleep(delay) | ||
| return datetime.now().timestamp() | ||
| def from_timestamp(stamp, fmt='[%m.%d %H:%M:%S]'): | ||
| return datetime.fromtimestamp(stamp, tz=timezone.utc).astimezone().strftime(fmt) | ||
@@ -34,0 +43,0 @@ |
+22
-19
@@ -153,16 +153,24 @@ from __future__ import annotations | ||
| def to_dataframe(raw, index=None, exclude=None, columns=None, data_exclude=None, data_prefix=None): | ||
| if isinstance(raw, BaseModel): | ||
| if not columns: | ||
| columns = ["key", "value"] | ||
| raw = {(f"{data_prefix}.{k}" if data_prefix else k): v | ||
| for k, v in raw.model_dump(exclude=data_exclude).items()} | ||
| return to_dataframe(raw, index=index, exclude=exclude, columns=columns) | ||
| elif dataclasses.is_dataclass(raw): | ||
| if not columns: | ||
| columns = ["key", "value"] | ||
| raw = {(f"{data_prefix}.{k}" if data_prefix else k): v | ||
| for k, v in asdict(raw).items() | ||
| if not data_exclude or k not in data_exclude} | ||
| return to_dataframe(raw, index=index, exclude=exclude, columns=columns) | ||
| def to_dataframe(raw, index=None, exclude=None, columns=None, data_exclude=None, data_prefix=None, sorted_keys=False): | ||
| if not columns: | ||
| columns = ["key", "value"] | ||
| if dataclasses.is_dataclass(raw): | ||
| raw_dict = asdict(raw) | ||
| raw_dict = {(f"{data_prefix}.{k}" if data_prefix else k): raw_dict[k] | ||
| for k in (sorted(raw_dict.keys()) if sorted_keys else raw_dict.keys()) | ||
| if not data_exclude or k not in data_exclude} | ||
| return to_dataframe(raw_dict, index=index, exclude=exclude, columns=columns) | ||
| elif isinstance(raw, BaseModel): | ||
| raw_dict = raw.model_dump(exclude=data_exclude) | ||
| raw_dict = {(f"{data_prefix}.{k}" if data_prefix else k): raw_dict[k] | ||
| for k in (sorted(raw_dict.keys()) if sorted_keys else raw_dict.keys()) | ||
| if not data_exclude or k not in data_exclude} | ||
| return to_dataframe(raw_dict, index=index, exclude=exclude, columns=columns) | ||
| elif isinstance(raw, dict): | ||
| raw_dict = raw | ||
| raw_dict = {(f"{data_prefix}.{k}" if data_prefix else k): raw_dict[k] | ||
| for k in (sorted(raw_dict.keys()) if sorted_keys else raw_dict.keys()) | ||
| if not data_exclude or k not in data_exclude} | ||
| return pd.DataFrame.from_records(tuple(raw_dict.items()), | ||
| index=index, exclude=exclude, columns=columns) | ||
| elif isinstance(raw, (list, tuple)): | ||
@@ -174,7 +182,2 @@ if raw and isinstance(raw[0], dict): | ||
| index=index, exclude=exclude, columns=columns) | ||
| elif isinstance(raw, dict): | ||
| if not columns: | ||
| columns = ["key", "value"] | ||
| return pd.DataFrame.from_records(tuple(raw.items()), | ||
| index=index, exclude=exclude, columns=columns) | ||
| else: | ||
@@ -181,0 +184,0 @@ return pd.DataFrame.from_records(raw, index=index, exclude=exclude, columns=columns) |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
83808
18.85%1793
17.81%