Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoInstallSign in
Socket

chrisbase

Package Overview
Dependencies
Maintainers
1
Versions
45
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

chrisbase - npm Package Compare versions

Comparing version
0.5.7
to
0.5.8
+2
-2
PKG-INFO

@@ -1,4 +0,4 @@

Metadata-Version: 2.1
Metadata-Version: 2.2
Name: chrisbase
Version: 0.5.7
Version: 0.5.8
Summary: Base library for python coding

@@ -5,0 +5,0 @@ Home-page: https://github.com/chrisjihee/chrisbase

[metadata]
name = chrisbase
version = 0.5.7
version = 0.5.8
author = Jihee Ryu

@@ -5,0 +5,0 @@ author_email = chrisjihee@naver.com

@@ -1,4 +0,4 @@

Metadata-Version: 2.1
Metadata-Version: 2.2
Name: chrisbase
Version: 0.5.7
Version: 0.5.8
Summary: Base library for python coding

@@ -5,0 +5,0 @@ Home-page: https://github.com/chrisjihee/chrisbase

@@ -0,1 +1,2 @@

import itertools
import json

@@ -6,3 +7,4 @@ import logging

import warnings
from dataclasses import dataclass, field
from dataclasses import dataclass
from dataclasses import field
from datetime import datetime

@@ -13,3 +15,4 @@ from datetime import timedelta

from pathlib import Path
from typing import List, Optional, Mapping, Any, Iterable, Tuple, ClassVar
from typing import Any, Callable, Optional, ClassVar
from typing import Iterable, List, Tuple, Mapping

@@ -24,6 +27,7 @@ import pandas as pd

from more_itertools import ichunked
from pydantic import BaseModel
from pydantic import BaseModel, Field, model_validator, ConfigDict
from pymongo import MongoClient
from typing_extensions import Self
from chrisbase.io import get_hostname, get_hostaddr, current_file, first_or, cwd, hr, flush_or, make_parent_dir, setup_unit_logger, setup_dual_logger, open_file, file_lines, to_table_lines, new_path, get_http_clients
from chrisbase.io import get_hostname, get_hostaddr, current_file, first_or, cwd, hr, flush_or, make_parent_dir, setup_unit_logger, setup_dual_logger, open_file, file_lines, new_path, get_http_clients, log_table, LoggingFormat
from chrisbase.time import now, str_delta

@@ -36,9 +40,141 @@ from chrisbase.util import tupled, SP, NO, to_dataframe

class AppTyper(typer.Typer):
def __init__(self):
def __init__(self, *args, **kwargs):
super().__init__(
*args,
add_completion=False,
pretty_exceptions_enable=False,
)
**kwargs)
@staticmethod
def run(function: Callable[..., Any], **kwargs) -> None:
app = AppTyper(**kwargs)
app.command()(function)
app()
class NewProjectEnv(BaseModel):
hostname: str = get_hostname()
hostaddr: str = get_hostaddr()
global_rank: int = Field(default=-1)
local_rank: int = Field(default=-1)
node_rank: int = Field(default=-1)
world_size: int = Field(default=-1)
time_stamp: str = Field(default=now('%m%d-%H%M%S'))
python_path: Path = Path(sys.executable).absolute()
current_dir: Path = Path().absolute()
current_file: Path = Path(sys.argv[0])
command_args: list[str] = sys.argv[1:]
output_home: str | Path = Field(default="output")
output_name: str | Path = Field(default=None)
run_version: str | int | Path | None = Field(default=None)
output_file: str | Path = Field(default=None)
logging_file: str | Path = Field(default=None)
logging_level: int = Field(default=logging.INFO)
logging_format: str = Field(default=LoggingFormat.BRIEF_00)
datetime_format: str = Field(default="[%m.%d %H:%M:%S]")
argument_file: str | Path = Field(default=None)
random_seed: int = Field(default=None)
max_workers: int = Field(default=1)
debugging: bool = Field(default=False)
output_dir: Path | None = Field(default=None, init=False)
@model_validator(mode='after')
def after(self) -> Self:
if self.output_home:
self.output_home = Path(self.output_home)
self.output_dir = self.output_home
if self.output_name:
self.output_dir = self.output_dir / self.output_name
if self.run_version:
self.output_dir = self.output_dir / str(self.run_version)
self.setup_logger(self.logging_level)
return self
def setup_logger(self, logging_level: int = logging.INFO):
if self.output_dir and self.logging_file:
setup_dual_logger(
level=logging_level, fmt=self.logging_format, datefmt=self.datetime_format, stream=sys.stdout,
filename=self.output_dir / self.logging_file,
)
else:
setup_unit_logger(
level=logging_level, fmt=self.logging_format, datefmt=self.datetime_format, stream=sys.stdout,
)
return self
class TimeChecker(BaseModel):
t1: datetime = datetime.now()
t2: datetime = datetime.now()
started: str | None = Field(default=None)
settled: str | None = Field(default=None)
elapsed: str | None = Field(default=None)
def set_started(self):
self.started = now()
self.settled = None
self.elapsed = None
self.t1 = datetime.now()
return self
def set_settled(self):
self.t2 = datetime.now()
self.settled = now()
self.elapsed = str_delta(self.t2 - self.t1)
return self
class NewCommonArguments(BaseModel):
env: NewProjectEnv = Field(default=None)
time: TimeChecker = Field(default_factory=TimeChecker)
def dataframe(self, columns=None) -> pd.DataFrame:
if not columns:
columns = [self.__class__.__name__, "value"]
df = pd.concat([
to_dataframe(columns=columns, raw=self.env, data_prefix="env"),
to_dataframe(columns=columns, raw=self.time, data_prefix="time"),
]).reset_index(drop=True)
return df
def info_args(self, c="-", w=137):
log_table(logger, self.dataframe(), c=c, w=w, level=logging.INFO, tablefmt="tsv", bordered=True)
return self
def save_args(self, to: Path | str = None) -> Path | None:
if self.env.output_dir and self.env.argument_file:
args_file = to if to else self.env.output_dir / self.env.argument_file
args_json = self.model_dump_json(indent=2)
make_parent_dir(args_file).write_text(args_json, encoding="utf-8")
return args_file
else:
return None
class NewIOArguments(NewCommonArguments):
model_config = ConfigDict(arbitrary_types_allowed=True)
input: "InputOption" = Field(default=None)
output: "OutputOption" = Field(default=None)
option: BaseModel | None = None
def __post_init__(self):
super().__post_init__()
def dataframe(self, columns=None) -> pd.DataFrame:
if not columns:
columns = [self.__class__.__name__, "value"]
return pd.concat([
super().dataframe(columns=columns),
to_dataframe(columns=columns, raw=self.input, data_prefix="input", data_exclude=["file", "table", "index"]),
to_dataframe(columns=columns, raw=self.input.file, data_prefix="input.file") if self.input.file else None,
to_dataframe(columns=columns, raw=self.input.table, data_prefix="input.table") if self.input.table else None,
to_dataframe(columns=columns, raw=self.input.index, data_prefix="input.index") if self.input.index else None,
to_dataframe(columns=columns, raw=self.output, data_prefix="output", data_exclude=["file", "table", "index"]),
to_dataframe(columns=columns, raw=self.output.file, data_prefix="output.file") if self.output.file else None,
to_dataframe(columns=columns, raw=self.output.table, data_prefix="output.table") if self.output.table else None,
to_dataframe(columns=columns, raw=self.output.index, data_prefix="output.index") if self.output.index else None,
to_dataframe(columns=columns, raw=self.option, data_prefix="option") if self.option else None,
]).reset_index(drop=True)
@dataclass

@@ -227,3 +363,3 @@ class TypedData(DataClassJsonMixin):

self.opt: FileOption = opt
self.path: Path | None = None
self.path: Path = self.opt.home / self.opt.name
self.fp: IOBase | None = None

@@ -506,3 +642,3 @@

hostaddr: str = field(init=False)
time_stamp: str = now('%m%d.%H%M%S')
time_stamp: str = now('%m%d-%H%M%S')
python_path: Path = field(init=False)

@@ -540,5 +676,4 @@ current_dir: Path = field(init=False)

def info_env(self):
for line in to_table_lines(to_dataframe(self)):
logger.info(line)
def info_env(self, c="-", w=137):
log_table(logger, to_dataframe(self), c=c, w=w, level=logging.INFO, tablefmt="tsv", bordered=True)
return self

@@ -573,24 +708,2 @@

@dataclass
class TimeChecker(ResultData):
t1: datetime = datetime.now()
t2: datetime = datetime.now()
started: str | None = field(default=None)
settled: str | None = field(default=None)
elapsed: str | None = field(default=None)
def set_started(self):
self.started = now()
self.settled = None
self.elapsed = None
self.t1 = datetime.now()
return self
def set_settled(self):
self.t2 = datetime.now()
self.settled = now()
self.elapsed = str_delta(self.t2 - self.t1)
return self
@dataclass
class CommonArguments(ArgumentGroupData):

@@ -613,5 +726,4 @@ tag = None

def info_args(self):
for line in to_table_lines(self.dataframe()):
logger.info(line)
def info_args(self, c="-", w=137):
log_table(logger, self.dataframe(), c=c, w=w, level=logging.INFO, tablefmt="tsv", bordered=True)
return self

@@ -656,2 +768,23 @@

@dataclass
class Counter:
step: int = 1
_incs = itertools.count()
_base = itertools.count()
def __str__(self):
return f"Counter(val={self.val()}, step={self.step})"
def __repr__(self):
return f"Counter(val={self.val()}, step={self.step})"
def inc(self) -> int:
for _ in range(self.step):
next(self._incs)
return self.val()
def val(self) -> int:
return next(self._incs) - next(self._base)
class RuntimeChecking:

@@ -671,4 +804,4 @@ def __init__(self, args: CommonArguments):

class JobTimer:
def __init__(self, name=None, args: CommonArguments = None, prefix=None, postfix=None,
verbose=True, mt=0, mb=0, pt=0, pb=0, rt=0, rb=0, rc='-', rw=137,
def __init__(self, name=None, args: CommonArguments | NewCommonArguments = None, prefix=None, postfix=None,
verbose=1, mt=0, mb=0, pt=0, pb=0, rt=0, rb=0, rc='-', rw=137,
flush_sec=0.1, mute_loggers=None, mute_warning=None):

@@ -688,3 +821,3 @@ self.name = name

self.rw: int = rw
self.verbose: bool = verbose
self.verbose: int = verbose
assert isinstance(mute_loggers, (type(None), str, list, tuple, set))

@@ -709,3 +842,3 @@ assert isinstance(mute_warning, (type(None), str, list, tuple, set))

flush_or(sys.stdout, sys.stderr, sec=self.flush_sec if self.flush_sec else None)
if self.verbose:
if self.verbose > 0:
if self.mt > 0:

@@ -728,5 +861,6 @@ for _ in range(self.mt):

self.args.time.set_started()
if self.verbose:
if self.verbose >= 1:
self.args.info_args(c='-', w=self.rw)
if self.verbose >= 2:
self.args.save_args()
self.args.info_args()
self.t1 = datetime.now()

@@ -743,7 +877,8 @@ return self

self.args.time.set_settled()
self.args.save_args()
if self.verbose >= 2:
self.args.save_args()
self.t2 = datetime.now()
self.td = self.t2 - self.t1
flush_or(sys.stdout, sys.stderr, sec=self.flush_sec if self.flush_sec else None)
if self.verbose:
if self.verbose > 0:
if self.pb > 0:

@@ -774,1 +909,13 @@ for _ in range(self.pb):

exit(22)
def find_sublist_range(haystack: List[Any], sublist: List[Any], case_sensitive: bool = True) -> List[int]:
if not case_sensitive:
haystack = [x.lower() if isinstance(x, str) else x for x in haystack]
sublist = [x.lower() if isinstance(x, str) else x for x in sublist]
sub_len = len(sublist)
for i in range(len(haystack) - sub_len + 1):
if haystack[i:i + sub_len] == sublist:
return list(range(i, i + sub_len))
return list()
import bz2
import csv
import gzip

@@ -6,2 +7,3 @@ import json

import os
import re
import shutil

@@ -18,3 +20,3 @@ import socket

from time import sleep
from typing import Iterable
from typing import Iterable, List

@@ -25,2 +27,3 @@ import httpx

from tabulate import tabulate
from tensorboard.backend.event_processing import event_accumulator

@@ -44,24 +47,61 @@ from chrisbase.time import from_timestamp

BRIEF_20: str = ' ┇ '.join(['%(asctime)s', '%(name)20s', '%(message)s'])
CHECK_00: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(message)s'])
CHECK_12: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(name)12s', '%(message)s'])
CHECK_16: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(name)16s', '%(message)s'])
CHECK_20: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(name)20s', '%(message)s'])
CHECK_24: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(name)24s', '%(message)s'])
CHECK_28: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(name)28s', '%(message)s'])
CHECK_32: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(name)32s', '%(message)s'])
CHECK_36: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(name)36s', '%(message)s'])
CHECK_40: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(name)40s', '%(message)s'])
CHECK_48: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-8s', '%(name)48s', '%(message)s'])
DEBUG_00: str = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(message)s'])
DEBUG_12: str = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(name)12s', '%(message)s'])
DEBUG_16: str = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(name)16s', '%(message)s'])
DEBUG_20: str = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(name)20s', '%(message)s'])
DEBUG_24: str = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(name)24s', '%(message)s'])
DEBUG_28: str = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(name)28s', '%(message)s'])
DEBUG_32: str = ' ┇ '.join(['%(pathname)90s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(name)32s', '%(message)s'])
DEBUG_36: str = ' ┇ '.join(['%(pathname)90s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(name)36s', '%(message)s'])
DEBUG_40: str = ' ┇ '.join(['%(pathname)120s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(name)40s', '%(message)s'])
DEBUG_48: str = ' ┇ '.join(['%(pathname)120s:%(lineno)-5d', '%(asctime)s', '%(levelname)-8s', '%(name)48s', '%(message)s'])
CHECK_00: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(message)s'])
CHECK_12: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)12s', '%(message)s'])
CHECK_16: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)16s', '%(message)s'])
CHECK_20: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)20s', '%(message)s'])
CHECK_24: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)24s', '%(message)s'])
CHECK_28: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)28s', '%(message)s'])
CHECK_32: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)32s', '%(message)s'])
CHECK_36: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)36s', '%(message)s'])
CHECK_40: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)40s', '%(message)s'])
CHECK_48: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)48s', '%(message)s'])
TRACE_12: str = ' ┇ '.join(['%(asctime)s', '%(filename)12s:%(lineno)-4d', '%(message)s'])
TRACE_16: str = ' ┇ '.join(['%(asctime)s', '%(filename)16s:%(lineno)-4d', '%(message)s'])
TRACE_20: str = ' ┇ '.join(['%(asctime)s', '%(filename)20s:%(lineno)-4d', '%(message)s'])
TRACE_24: str = ' ┇ '.join(['%(asctime)s', '%(filename)24s:%(lineno)-4d', '%(message)s'])
TRACE_28: str = ' ┇ '.join(['%(asctime)s', '%(filename)28s:%(lineno)-4d', '%(message)s'])
TRACE_32: str = ' ┇ '.join(['%(asctime)s', '%(filename)32s:%(lineno)-4d', '%(message)s'])
TRACE_36: str = ' ┇ '.join(['%(asctime)s', '%(filename)36s:%(lineno)-4d', '%(message)s'])
TRACE_40: str = ' ┇ '.join(['%(asctime)s', '%(filename)40s:%(lineno)-4d', '%(message)s'])
DEBUG_00: str = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(message)s'])
DEBUG_12: str = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)12s', '%(message)s'])
DEBUG_16: str = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)16s', '%(message)s'])
DEBUG_20: str = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)20s', '%(message)s'])
DEBUG_24: str = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)24s', '%(message)s'])
DEBUG_28: str = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)28s', '%(message)s'])
DEBUG_32: str = ' ┇ '.join(['%(pathname)90s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)32s', '%(message)s'])
DEBUG_36: str = ' ┇ '.join(['%(pathname)90s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)36s', '%(message)s'])
DEBUG_40: str = ' ┇ '.join(['%(pathname)120s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)40s', '%(message)s'])
DEBUG_48: str = ' ┇ '.join(['%(pathname)120s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)48s', '%(message)s'])
class LoggerWriter:
def __init__(self, logger: logging.Logger, level: int = logging.INFO):
"""
A simple wrapper to use a logger like a file-like stream.
:param logger: The logger instance to which messages will be sent.
:param level: Logging level to use (default: logging.INFO).
"""
self.logger = logger
self.level = level
def write(self, msg: str):
"""
Emulates the behavior of a stream's write method.
Non-empty lines are forwarded to the logger at the given level.
"""
# Strip out extra whitespace/newlines and only log non-empty lines
msg = msg.rstrip()
if msg:
self.logger.log(self.level, msg)
def flush(self):
"""
Emulates the behavior of a stream's flush method.
In this context, we generally do not need to do anything special for flush.
"""
pass
class MuteStd:

@@ -162,4 +202,4 @@ def __init__(self, out=None, err=None, flush_sec=0.0, mute_warning=None, mute_logger=None):

def hr(c="=", w=137, t=0, b=0, title=''):
# w=137-26 : %(asctime)s %(levelname)-8s %(message)s
# w=137-47 : %(asctime)s %(levelname)-8s %(filename)15s:%(lineno)-4d %(message)s
# w=137-26 : %(asctime)s %(levelname)-7s %(message)s
# w=137-47 : %(asctime)s %(levelname)-7s %(filename)15s:%(lineno)-4d %(message)s
# w=137 (for ipynb on Chrome using D2Coding 13pt) with scroll

@@ -202,3 +242,3 @@ # w=139 (for ipynb on Chrome using D2Coding 13pt) without scroll

def str_table(tabular_data, headers=(), tablefmt="pipe", showindex="default", transposed_df=False, **kwargs):
def str_table(tabular_data, headers=(), tablefmt="plain", showindex="default", transposed_df=False, **kwargs):
if not headers and isinstance(tabular_data, pd.DataFrame):

@@ -208,6 +248,11 @@ if showindex is True or showindex == "default" or showindex == "always" or \

if transposed_df:
index_header = '#'
if tabular_data.index.name:
index_header = tabular_data.index.name
elif tabular_data.index.names:
index_header = ' '.join(tabular_data.index.names)
if isinstance(tabular_data.columns, pd.RangeIndex):
headers = ['key'] + list(map(str(range(1, len(tabular_data.columns) + 1))))
headers = [index_header] + list(map(str(range(1, len(tabular_data.columns) + 1))))
else:
headers = ['key'] + list(tabular_data.columns)
headers = [index_header] + list(tabular_data.columns)
else:

@@ -221,11 +266,19 @@ headers = ['#'] + list(tabular_data.columns)

def to_table_lines(*args, tablefmt="presto", border_idx=1, **kwargs):
def to_table_lines(*args, c="-", w=137, left='', tablefmt="plain", header_idx=0, bordered=False, **kwargs):
table = str_table(*args, **kwargs, tablefmt=tablefmt)
lines = table.splitlines()
border = lines[border_idx]
lines = [border] + lines + [border]
for line in lines:
if bordered:
border = hr(c=c, w=w)
lines = ([border] + lines[:header_idx + 1] +
[border] + lines[header_idx + 1:] +
[border])
for line in lines if not left else [left + line for line in lines]:
yield line
def log_table(my_logger, *args, c="-", w=137, level=logging.INFO, **kwargs):
for line in to_table_lines(*args, **kwargs, c=c, w=w):
my_logger.log(level, line)
def file_table(*args, file=sys_stdout, **kwargs):

@@ -296,3 +349,3 @@ print(str_table(*args, **kwargs), file=file)

def paths(path, accept_fn=lambda _: True):
def paths(path, accept_fn=lambda _: True) -> List[Path]:
assert path, f"No path: {path}"

@@ -307,11 +360,15 @@ path = Path(path)

def dirs(path):
def dirs(path) -> List[Path]:
return paths(path, accept_fn=lambda x: x.is_dir())
def files(path):
def files(path) -> List[Path]:
return paths(path, accept_fn=lambda x: x.is_file())
def glob_dirs(path, glob: str):
def non_empty_files(path) -> List[Path]:
return paths(path, accept_fn=lambda x: x.is_file() and x.stat().st_size > 0)
def glob_dirs(path, glob: str) -> List[Path]:
path = Path(path)

@@ -321,3 +378,3 @@ return sorted([x for x in path.glob(glob) if x.is_dir()])

def glob_files(path, glob: str):
def glob_files(path, glob: str) -> List[Path]:
path = Path(path)

@@ -384,2 +441,6 @@ return sorted([x for x in path.glob(glob) if x.is_file()])

def all_line_list(path, mini=None):
return list(all_lines(path, mini))
def tsv_lines(*args, **kwargs):

@@ -393,5 +454,21 @@ return map(lambda x: x.split('\t'), all_lines(*args, **kwargs))

def text_blocks(path) -> Iterable[List[str]]:
block = []
with open(path, mode="r", encoding="utf-8") as f:
for line in f:
line = line[:-1]
if not line:
if block:
yield block
block = []
else:
block.append(line)
if block:
yield block
def new_path(path, post=None, pre=None, sep='-') -> Path:
path = Path(path)
new_name = (f"{pre}{sep}" if pre else "") + path.stem + (f"{sep}{post}" if post else "")
new_name = (f"{pre}{sep}" if pre is not None else "") + path.stem + (f"{sep}{post}" if post is not None else "")
return path.parent / (new_name + NO.join(path.suffixes))

@@ -471,2 +548,18 @@

# define function to normalize simple list in json
def normalize_simple_list_in_json(json_input):
json_output = []
pattern = re.compile(r"\[[^\[\]]+?]")
if re.search(pattern, json_input):
pre_end = 0
for m in re.finditer(pattern, json_input):
json_output.append(m.string[pre_end: m.start()])
json_output.append("[" + " ".join(m.group().split()).removeprefix("[ ").removesuffix(" ]") + "]")
pre_end = m.end()
json_output.append(m.string[pre_end:])
return ''.join(json_output)
else:
return json_input
def open_file(path: str | Path, mode: str = "rb", **kwargs) -> IOBase:

@@ -736,1 +829,82 @@ file = Path(path)

logger.debug(f"logging.getLogger({x.name:<20s}) = Logger(level={x.level}, handlers={x.handlers}, disabled={x.disabled}, propagate={x.propagate}, parent={x.parent})")
def set_verbosity_debug(*names):
for name in names:
logging.getLogger(name).setLevel(logging.DEBUG)
def set_verbosity_info(*names):
for name in names:
logging.getLogger(name).setLevel(logging.INFO)
def set_verbosity_warning(*names):
for name in names:
logging.getLogger(name).setLevel(logging.WARNING)
def set_verbosity_error(*names):
for name in names:
logging.getLogger(name).setLevel(logging.ERROR)
def do_nothing(*args, **kwargs):
pass
def info_r(x, *y, **z):
x = str(x).rstrip()
logger.info(x, *y, **z)
def tb_events_to_csv(
event_file: str | Path, # 단일 event 파일 경로 (예: "output/runs/events.out.tfevents.xxxx")
output_file: str | Path, # 내보낼 CSV 경로
purge_orphaned_data=True,
):
"""
지정한 TensorBoard 이벤트 로그(event_file)를 파싱하여
CSV로 저장합니다.
- out_csv_path가 이미 존재하면 덮어씁니다.
- event_file에 여러 Scalar 태그가 존재할 경우, 모든 태그를 모아
[wall_time, step, tag, value] 형태로 CSV 파일에 기록합니다.
"""
ea = event_accumulator.EventAccumulator(
str(event_file),
purge_orphaned_data=purge_orphaned_data # or False
)
ea.Reload()
scalar_tags = ea.Tags().get("scalars", [])
with open(output_file, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["wall_time", "step", "tag", "value"]) # CSV 헤더
for tag in scalar_tags:
# ea.Scalars(tag)는 해당 tag로 기록된 전체 log를 리스트 형태로 반환
for event in ea.Scalars(tag):
writer.writerow([
event.wall_time, # float(Unix 시간)
event.step, # int(전역 global_step)
tag, # 예: "eval/loss", "train/loss" 등
event.value # 실제 측정값
])
def convert_all_events_in_dir(log_dir: str | Path):
"""
Converts all TensorBoard event files in `log_dir` to CSV.
Each event file produces a separate CSV file.
"""
input_files = os.path.join(log_dir, "**/events.out.tfevents.*")
for input_file in files(input_files):
if not input_file.name.endswith(".csv"):
output_file = input_file.with_name(input_file.name + ".csv")
logger.info(f"Convert {input_file} to csv")
tb_events_to_csv(input_file, output_file)
def strip_lines(text):
return "\n".join([line.strip() for line in text.splitlines()])

@@ -0,1 +1,2 @@

import time
from datetime import datetime, timedelta, timezone

@@ -9,3 +10,5 @@

def now(fmt='[%m.%d %H:%M:%S]', prefix=None):
def now(fmt='[%m.%d %H:%M:%S]', prefix=None, delay=0) -> str:
if delay:
time.sleep(delay)
if prefix:

@@ -31,3 +34,9 @@ return f"{prefix} {datetime.now().strftime(fmt)}"

def from_timestamp(stamp, fmt='%Y/%m/%d %H:%M:%S'):
def now_stamp(delay=0) -> float:
if delay:
time.sleep(delay)
return datetime.now().timestamp()
def from_timestamp(stamp, fmt='[%m.%d %H:%M:%S]'):
return datetime.fromtimestamp(stamp, tz=timezone.utc).astimezone().strftime(fmt)

@@ -34,0 +43,0 @@

@@ -153,16 +153,24 @@ from __future__ import annotations

def to_dataframe(raw, index=None, exclude=None, columns=None, data_exclude=None, data_prefix=None):
if isinstance(raw, BaseModel):
if not columns:
columns = ["key", "value"]
raw = {(f"{data_prefix}.{k}" if data_prefix else k): v
for k, v in raw.model_dump(exclude=data_exclude).items()}
return to_dataframe(raw, index=index, exclude=exclude, columns=columns)
elif dataclasses.is_dataclass(raw):
if not columns:
columns = ["key", "value"]
raw = {(f"{data_prefix}.{k}" if data_prefix else k): v
for k, v in asdict(raw).items()
if not data_exclude or k not in data_exclude}
return to_dataframe(raw, index=index, exclude=exclude, columns=columns)
def to_dataframe(raw, index=None, exclude=None, columns=None, data_exclude=None, data_prefix=None, sorted_keys=False):
if not columns:
columns = ["key", "value"]
if dataclasses.is_dataclass(raw):
raw_dict = asdict(raw)
raw_dict = {(f"{data_prefix}.{k}" if data_prefix else k): raw_dict[k]
for k in (sorted(raw_dict.keys()) if sorted_keys else raw_dict.keys())
if not data_exclude or k not in data_exclude}
return to_dataframe(raw_dict, index=index, exclude=exclude, columns=columns)
elif isinstance(raw, BaseModel):
raw_dict = raw.model_dump(exclude=data_exclude)
raw_dict = {(f"{data_prefix}.{k}" if data_prefix else k): raw_dict[k]
for k in (sorted(raw_dict.keys()) if sorted_keys else raw_dict.keys())
if not data_exclude or k not in data_exclude}
return to_dataframe(raw_dict, index=index, exclude=exclude, columns=columns)
elif isinstance(raw, dict):
raw_dict = raw
raw_dict = {(f"{data_prefix}.{k}" if data_prefix else k): raw_dict[k]
for k in (sorted(raw_dict.keys()) if sorted_keys else raw_dict.keys())
if not data_exclude or k not in data_exclude}
return pd.DataFrame.from_records(tuple(raw_dict.items()),
index=index, exclude=exclude, columns=columns)
elif isinstance(raw, (list, tuple)):

@@ -174,7 +182,2 @@ if raw and isinstance(raw[0], dict):

index=index, exclude=exclude, columns=columns)
elif isinstance(raw, dict):
if not columns:
columns = ["key", "value"]
return pd.DataFrame.from_records(tuple(raw.items()),
index=index, exclude=exclude, columns=columns)
else:

@@ -181,0 +184,0 @@ return pd.DataFrame.from_records(raw, index=index, exclude=exclude, columns=columns)