chrisbase
Advanced tools
+15
-11
| Metadata-Version: 2.4 | ||
| Name: chrisbase | ||
| Version: 0.5.9 | ||
| Version: 0.6.0 | ||
| Summary: Base library for python coding | ||
@@ -20,21 +20,25 @@ Home-page: https://github.com/chrisjihee/chrisbase | ||
| Requires-Dist: typer | ||
| Requires-Dist: hydra-core | ||
| Requires-Dist: pydantic | ||
| Requires-Dist: omegaconf | ||
| Requires-Dist: dataclasses | ||
| Requires-Dist: dataclasses-json | ||
| Requires-Dist: httpx | ||
| Requires-Dist: ipaddress | ||
| Requires-Dist: netifaces | ||
| Requires-Dist: numpy | ||
| Requires-Dist: scipy | ||
| Requires-Dist: httpx | ||
| Requires-Dist: pandas | ||
| Requires-Dist: pymongo | ||
| Requires-Dist: pydantic | ||
| Requires-Dist: openpyxl | ||
| Requires-Dist: matplotlib | ||
| Requires-Dist: scikit-learn | ||
| Requires-Dist: tabulate | ||
| Requires-Dist: ipynbname | ||
| Requires-Dist: ipaddress | ||
| Requires-Dist: netifaces | ||
| Requires-Dist: matplotlib | ||
| Requires-Dist: filelock | ||
| Requires-Dist: openpyxl | ||
| Requires-Dist: pymongo | ||
| Requires-Dist: sqlalchemy | ||
| Requires-Dist: dataclasses | ||
| Requires-Dist: dataclasses-json | ||
| Requires-Dist: elasticsearch | ||
| Requires-Dist: more-itertools | ||
| Requires-Dist: scikit-learn | ||
| Requires-Dist: seqeval | ||
| Requires-Dist: accelerate | ||
| Dynamic: license-file | ||
@@ -41,0 +45,0 @@ |
+15
-11
| [metadata] | ||
| name = chrisbase | ||
| version = 0.5.9 | ||
| version = 0.6.0 | ||
| author = Jihee Ryu | ||
@@ -29,21 +29,25 @@ author_email = chrisjihee@naver.com | ||
| typer | ||
| hydra-core | ||
| pydantic | ||
| omegaconf | ||
| dataclasses | ||
| dataclasses-json | ||
| httpx | ||
| ipaddress | ||
| netifaces | ||
| numpy | ||
| scipy | ||
| httpx | ||
| pandas | ||
| pymongo | ||
| pydantic | ||
| openpyxl | ||
| matplotlib | ||
| scikit-learn | ||
| tabulate | ||
| ipynbname | ||
| ipaddress | ||
| netifaces | ||
| matplotlib | ||
| filelock | ||
| openpyxl | ||
| pymongo | ||
| sqlalchemy | ||
| dataclasses | ||
| dataclasses-json | ||
| elasticsearch | ||
| more-itertools | ||
| scikit-learn | ||
| seqeval | ||
| accelerate | ||
@@ -50,0 +54,0 @@ [options.packages.find] |
| Metadata-Version: 2.4 | ||
| Name: chrisbase | ||
| Version: 0.5.9 | ||
| Version: 0.6.0 | ||
| Summary: Base library for python coding | ||
@@ -20,21 +20,25 @@ Home-page: https://github.com/chrisjihee/chrisbase | ||
| Requires-Dist: typer | ||
| Requires-Dist: hydra-core | ||
| Requires-Dist: pydantic | ||
| Requires-Dist: omegaconf | ||
| Requires-Dist: dataclasses | ||
| Requires-Dist: dataclasses-json | ||
| Requires-Dist: httpx | ||
| Requires-Dist: ipaddress | ||
| Requires-Dist: netifaces | ||
| Requires-Dist: numpy | ||
| Requires-Dist: scipy | ||
| Requires-Dist: httpx | ||
| Requires-Dist: pandas | ||
| Requires-Dist: pymongo | ||
| Requires-Dist: pydantic | ||
| Requires-Dist: openpyxl | ||
| Requires-Dist: matplotlib | ||
| Requires-Dist: scikit-learn | ||
| Requires-Dist: tabulate | ||
| Requires-Dist: ipynbname | ||
| Requires-Dist: ipaddress | ||
| Requires-Dist: netifaces | ||
| Requires-Dist: matplotlib | ||
| Requires-Dist: filelock | ||
| Requires-Dist: openpyxl | ||
| Requires-Dist: pymongo | ||
| Requires-Dist: sqlalchemy | ||
| Requires-Dist: dataclasses | ||
| Requires-Dist: dataclasses-json | ||
| Requires-Dist: elasticsearch | ||
| Requires-Dist: more-itertools | ||
| Requires-Dist: scikit-learn | ||
| Requires-Dist: seqeval | ||
| Requires-Dist: accelerate | ||
| Dynamic: license-file | ||
@@ -41,0 +45,0 @@ |
| tqdm | ||
| typer | ||
| hydra-core | ||
| pydantic | ||
| omegaconf | ||
| dataclasses | ||
| dataclasses-json | ||
| httpx | ||
| ipaddress | ||
| netifaces | ||
| numpy | ||
| scipy | ||
| httpx | ||
| pandas | ||
| pymongo | ||
| pydantic | ||
| openpyxl | ||
| matplotlib | ||
| scikit-learn | ||
| tabulate | ||
| ipynbname | ||
| ipaddress | ||
| netifaces | ||
| matplotlib | ||
| filelock | ||
| openpyxl | ||
| pymongo | ||
| sqlalchemy | ||
| dataclasses | ||
| dataclasses-json | ||
| elasticsearch | ||
| more-itertools | ||
| scikit-learn | ||
| seqeval | ||
| accelerate |
@@ -9,3 +9,2 @@ LICENSE | ||
| src/chrisbase/io.py | ||
| src/chrisbase/morp.py | ||
| src/chrisbase/net.py | ||
@@ -12,0 +11,0 @@ src/chrisbase/time.py |
+11
-0
@@ -0,1 +1,3 @@ | ||
| import subprocess | ||
| from typer import Typer | ||
@@ -6,2 +8,11 @@ | ||
| def run_command(cmd): | ||
| cmd = cmd.strip().split() | ||
| print("*" * 120) | ||
| print("[COMMAND]", " ".join(cmd)) | ||
| print("*" * 120) | ||
| subprocess.run(cmd) | ||
| print("\n" * 3) | ||
| @app.command() | ||
@@ -8,0 +19,0 @@ def hello(): |
+84
-29
@@ -7,2 +7,3 @@ import itertools | ||
| import warnings | ||
| from contextlib import contextmanager | ||
| from dataclasses import dataclass | ||
@@ -18,2 +19,3 @@ from dataclasses import field | ||
| import datasets | ||
| import pandas as pd | ||
@@ -24,14 +26,14 @@ import pymongo.collection | ||
| import typer | ||
| from chrisbase.io import get_hostname, get_hostaddr, current_file, first_or, cwd, hr, flush_or, make_parent_dir, setup_unit_logger, setup_dual_logger, open_file, file_lines, new_path, get_http_clients, log_table, LoggingFormat, to_yaml | ||
| from chrisbase.time import now, str_delta | ||
| from chrisbase.util import tupled, SP, NO, to_dataframe | ||
| from dataclasses_json import DataClassJsonMixin | ||
| from elasticsearch import Elasticsearch | ||
| from more_itertools import ichunked | ||
| from pydantic import BaseModel, Field, model_validator, ConfigDict | ||
| from omegaconf import DictConfig, OmegaConf | ||
| from pydantic import BaseModel, Field, ConfigDict, model_validator, field_validator | ||
| from pymongo import MongoClient | ||
| from transformers import set_seed | ||
| from typing_extensions import Self | ||
| from chrisbase.io import get_hostname, get_hostaddr, current_file, first_or, cwd, hr, flush_or, make_parent_dir, setup_unit_logger, setup_dual_logger, open_file, file_lines, new_path, get_http_clients, log_table, LoggingFormat | ||
| from chrisbase.time import now, str_delta | ||
| from chrisbase.util import tupled, SP, NO, to_dataframe | ||
| from transformers import set_seed | ||
| logger = logging.getLogger(__name__) | ||
@@ -50,8 +52,44 @@ | ||
| def run(*functions: Callable[..., Any], **kwargs) -> None: | ||
| app = AppTyper(**kwargs) | ||
| app = AppTyper() | ||
| for function in functions: | ||
| app.command()(function) | ||
| app.command(**kwargs)(function) | ||
| app() | ||
| @contextmanager | ||
| def temporary_mutable_conf(*cfgs): | ||
| """ | ||
| 여러 OmegaConf 객체를 with 블록 동안만 수정 가능하게 만들고, | ||
| 블록이 끝나면 원래 readonly 상태를 복원한다. | ||
| """ | ||
| original_states = [OmegaConf.is_readonly(c) for c in cfgs] | ||
| # 모두 mutable 로 설정 | ||
| for c in cfgs: | ||
| OmegaConf.set_readonly(c, False) | ||
| try: | ||
| yield # ‼️ 단 한 번만 yield | ||
| finally: | ||
| # 원래 상태로 복원 | ||
| for c, state in zip(cfgs, original_states): | ||
| OmegaConf.set_readonly(c, state) | ||
| @contextmanager | ||
| def disable_datasets_progress(): | ||
| """ | ||
| Context manager to temporarily disable the datasets progress bar. | ||
| On entering, it calls datasets.disable_progress_bar(), | ||
| and on exiting, it restores the progress bar with datasets.enable_progress_bar(). | ||
| """ | ||
| # Turn off the progress bar | ||
| datasets.disable_progress_bar() | ||
| try: | ||
| yield | ||
| finally: | ||
| # Always re-enable the progress bar, even if an error occurs | ||
| datasets.enable_progress_bar() | ||
| class NewProjectEnv(BaseModel): | ||
@@ -75,3 +113,3 @@ hostname: str = get_hostname() | ||
| logging_level: int = Field(default=logging.INFO) | ||
| logging_format: str = Field(default=LoggingFormat.BRIEF_00) | ||
| logging_format: LoggingFormat = Field(default=LoggingFormat.BRIEF_00) | ||
| datetime_format: str = Field(default="[%m.%d %H:%M:%S]") | ||
@@ -84,2 +122,17 @@ argument_file: str | Path = Field(default=None) | ||
| @field_validator('logging_format', mode='before') | ||
| def validate_logging_format(cls, v): | ||
| # 만약 입력값이 문자열이라면, 해당 문자열이 LoggingFormat의 멤버 이름과 일치하는지 확인 | ||
| if isinstance(v, str): | ||
| try: | ||
| # 간단하게 Enum 멤버 이름으로 변환 | ||
| return LoggingFormat[v] | ||
| except KeyError: | ||
| # 만약 Enum 멤버 이름이 아니라면, 실제 값과 일치하는지 체크 | ||
| for member in LoggingFormat: | ||
| if v == member.value: | ||
| return member | ||
| raise ValueError(f"Invalid logging_format: {v}.") | ||
| return v | ||
| @model_validator(mode='after') | ||
@@ -103,3 +156,6 @@ def after(self) -> Self: | ||
| setup_dual_logger( | ||
| level=logging_level, fmt=self.logging_format, datefmt=self.datetime_format, stream=sys.stdout, | ||
| level=logging_level, | ||
| fmt=self.logging_format.value, | ||
| datefmt=self.datetime_format, | ||
| stream=sys.stdout, | ||
| filename=self.output_dir / self.logging_file, | ||
@@ -109,3 +165,6 @@ ) | ||
| setup_unit_logger( | ||
| level=logging_level, fmt=self.logging_format, datefmt=self.datetime_format, stream=sys.stdout, | ||
| level=logging_level, | ||
| fmt=self.logging_format.value, | ||
| datefmt=self.datetime_format, | ||
| stream=sys.stdout, | ||
| ) | ||
@@ -793,17 +852,4 @@ return self | ||
| class RuntimeChecking: | ||
| def __init__(self, args: CommonArguments): | ||
| self.args: CommonArguments = args | ||
| def __enter__(self): | ||
| self.args.time.set_started() | ||
| self.args.save_args() | ||
| def __exit__(self, *exc_info): | ||
| self.args.time.set_settled() | ||
| self.args.save_args() | ||
| class JobTimer: | ||
| def __init__(self, name=None, args: CommonArguments | NewCommonArguments = None, prefix=None, postfix=None, | ||
| def __init__(self, name=None, args: CommonArguments | NewCommonArguments | DictConfig = None, prefix=None, postfix=None, | ||
| verbose=1, mt=0, mb=0, pt=0, pb=0, rt=0, rb=0, rc='-', rw=137, | ||
@@ -861,7 +907,15 @@ flush_sec=0.1, mute_loggers=None, mute_warning=None): | ||
| if self.args: | ||
| self.args.time.set_started() | ||
| if hasattr(self.args, "time"): | ||
| self.args.time.set_started() | ||
| if self.verbose >= 1: | ||
| self.args.info_args(c='-', w=self.rw) | ||
| if hasattr(self.args, "info_args"): | ||
| self.args.info_args(c="-", w=self.rw) | ||
| else: | ||
| yaml_str = to_yaml(self.args, resolve=True, width=4096).rstrip() | ||
| logger.info("[args]") | ||
| sum(logger.info(f" {l}") or 1 for l in yaml_str.splitlines()) | ||
| logger.info(hr(c=self.rc, w=self.rw)) | ||
| if self.verbose >= 2: | ||
| self.args.save_args() | ||
| if hasattr(self.args, "save_args"): | ||
| self.args.save_args() | ||
| self.t1 = datetime.now() | ||
@@ -877,3 +931,4 @@ return self | ||
| if self.args: | ||
| self.args.time.set_settled() | ||
| if hasattr(self.args, "time"): | ||
| self.args.time.set_settled() | ||
| if self.verbose >= 2: | ||
@@ -880,0 +935,0 @@ self.args.save_args() |
+87
-42
@@ -24,8 +24,10 @@ import bz2 | ||
| import pandas as pd | ||
| import yaml | ||
| from chrisbase.time import from_timestamp | ||
| from chrisbase.util import tupled, OX | ||
| from omegaconf import OmegaConf | ||
| from omegaconf._utils import get_omega_conf_dumper | ||
| from tabulate import tabulate | ||
| from tensorboard.backend.event_processing import event_accumulator | ||
| from chrisbase.time import from_timestamp | ||
| from chrisbase.util import tupled, NO, OX | ||
| logger = logging.getLogger(__name__) | ||
@@ -35,42 +37,44 @@ sys_stdout = sys.stdout | ||
| from enum import Enum | ||
| class LoggingFormat: | ||
| PRINT_00: str = ' ┇ '.join(['%(message)s']) | ||
| PRINT_12: str = ' ┇ '.join(['%(name)12s', '%(message)s']) | ||
| PRINT_16: str = ' ┇ '.join(['%(name)16s', '%(message)s']) | ||
| PRINT_20: str = ' ┇ '.join(['%(name)20s', '%(message)s']) | ||
| BRIEF_00: str = ' ┇ '.join(['%(asctime)s', '%(message)s']) | ||
| BRIEF_12: str = ' ┇ '.join(['%(asctime)s', '%(name)12s', '%(message)s']) | ||
| BRIEF_16: str = ' ┇ '.join(['%(asctime)s', '%(name)16s', '%(message)s']) | ||
| BRIEF_20: str = ' ┇ '.join(['%(asctime)s', '%(name)20s', '%(message)s']) | ||
| CHECK_00: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(message)s']) | ||
| CHECK_12: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)12s', '%(message)s']) | ||
| CHECK_16: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)16s', '%(message)s']) | ||
| CHECK_20: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)20s', '%(message)s']) | ||
| CHECK_24: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)24s', '%(message)s']) | ||
| CHECK_28: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)28s', '%(message)s']) | ||
| CHECK_32: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)32s', '%(message)s']) | ||
| CHECK_36: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)36s', '%(message)s']) | ||
| CHECK_40: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)40s', '%(message)s']) | ||
| CHECK_48: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)48s', '%(message)s']) | ||
| TRACE_12: str = ' ┇ '.join(['%(asctime)s', '%(filename)12s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_16: str = ' ┇ '.join(['%(asctime)s', '%(filename)16s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_20: str = ' ┇ '.join(['%(asctime)s', '%(filename)20s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_24: str = ' ┇ '.join(['%(asctime)s', '%(filename)24s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_28: str = ' ┇ '.join(['%(asctime)s', '%(filename)28s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_32: str = ' ┇ '.join(['%(asctime)s', '%(filename)32s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_36: str = ' ┇ '.join(['%(asctime)s', '%(filename)36s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_40: str = ' ┇ '.join(['%(asctime)s', '%(filename)40s:%(lineno)-4d', '%(message)s']) | ||
| DEBUG_00: str = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(message)s']) | ||
| DEBUG_12: str = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)12s', '%(message)s']) | ||
| DEBUG_16: str = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)16s', '%(message)s']) | ||
| DEBUG_20: str = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)20s', '%(message)s']) | ||
| DEBUG_24: str = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)24s', '%(message)s']) | ||
| DEBUG_28: str = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)28s', '%(message)s']) | ||
| DEBUG_32: str = ' ┇ '.join(['%(pathname)90s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)32s', '%(message)s']) | ||
| DEBUG_36: str = ' ┇ '.join(['%(pathname)90s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)36s', '%(message)s']) | ||
| DEBUG_40: str = ' ┇ '.join(['%(pathname)120s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)40s', '%(message)s']) | ||
| DEBUG_48: str = ' ┇ '.join(['%(pathname)120s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)48s', '%(message)s']) | ||
| class LoggingFormat(Enum): | ||
| PRINT_00 = ' ┇ '.join(['%(message)s']) | ||
| PRINT_12 = ' ┇ '.join(['%(name)12s', '%(message)s']) | ||
| PRINT_16 = ' ┇ '.join(['%(name)16s', '%(message)s']) | ||
| PRINT_20 = ' ┇ '.join(['%(name)20s', '%(message)s']) | ||
| BRIEF_00 = ' ┇ '.join(['%(asctime)s', '%(message)s']) | ||
| BRIEF_12 = ' ┇ '.join(['%(asctime)s', '%(name)12s', '%(message)s']) | ||
| BRIEF_16 = ' ┇ '.join(['%(asctime)s', '%(name)16s', '%(message)s']) | ||
| BRIEF_20 = ' ┇ '.join(['%(asctime)s', '%(name)20s', '%(message)s']) | ||
| CHECK_00 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(message)s']) | ||
| CHECK_12 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)12s', '%(message)s']) | ||
| CHECK_16 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)16s', '%(message)s']) | ||
| CHECK_20 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)20s', '%(message)s']) | ||
| CHECK_24 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)24s', '%(message)s']) | ||
| CHECK_28 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)28s', '%(message)s']) | ||
| CHECK_32 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)32s', '%(message)s']) | ||
| CHECK_36 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)36s', '%(message)s']) | ||
| CHECK_40 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)40s', '%(message)s']) | ||
| CHECK_48 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)48s', '%(message)s']) | ||
| TRACE_12 = ' ┇ '.join(['%(asctime)s', '%(filename)12s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_16 = ' ┇ '.join(['%(asctime)s', '%(filename)16s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_20 = ' ┇ '.join(['%(asctime)s', '%(filename)20s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_24 = ' ┇ '.join(['%(asctime)s', '%(filename)24s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_28 = ' ┇ '.join(['%(asctime)s', '%(filename)28s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_32 = ' ┇ '.join(['%(asctime)s', '%(filename)32s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_36 = ' ┇ '.join(['%(asctime)s', '%(filename)36s:%(lineno)-4d', '%(message)s']) | ||
| TRACE_40 = ' ┇ '.join(['%(asctime)s', '%(filename)40s:%(lineno)-4d', '%(message)s']) | ||
| DEBUG_00 = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(message)s']) | ||
| DEBUG_12 = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)12s', '%(message)s']) | ||
| DEBUG_16 = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)16s', '%(message)s']) | ||
| DEBUG_20 = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)20s', '%(message)s']) | ||
| DEBUG_24 = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)24s', '%(message)s']) | ||
| DEBUG_28 = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)28s', '%(message)s']) | ||
| DEBUG_32 = ' ┇ '.join(['%(pathname)90s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)32s', '%(message)s']) | ||
| DEBUG_36 = ' ┇ '.join(['%(pathname)90s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)36s', '%(message)s']) | ||
| DEBUG_40 = ' ┇ '.join(['%(pathname)120s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)40s', '%(message)s']) | ||
| DEBUG_48 = ' ┇ '.join(['%(pathname)120s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)48s', '%(message)s']) | ||
| class LoggerWriter: | ||
@@ -385,2 +389,18 @@ def __init__(self, logger: logging.Logger, level: int = logging.INFO): | ||
| def count_dirs(path, key, sub=None): | ||
| path = Path(path) | ||
| if not sub: | ||
| return sum(1 for x in path.glob(f"*{key}*") if x.is_dir()) | ||
| else: | ||
| return sum(1 for x in path.glob(f"*{key}*/*{sub}*") if x.is_dir()) | ||
| def count_files(path, key, sub=None): | ||
| path = Path(path) | ||
| if not sub: | ||
| return sum(1 for x in path.glob(f"*{key}*") if x.is_file()) | ||
| else: | ||
| return sum(1 for x in path.glob(f"*{key}*/**/*{sub}*") if x.is_file()) | ||
| def paths_info(*xs, to_pathlist=paths, to_filename=str, sort_key=None): | ||
@@ -473,4 +493,4 @@ from chrisbase.util import to_dataframe | ||
| path = Path(path) | ||
| new_name = (f"{pre}{sep}" if pre is not None else "") + path.stem + (f"{sep}{post}" if post is not None else "") | ||
| return path.parent / (new_name + NO.join(path.suffixes)) | ||
| new_stem = (f"{pre}{sep}" if pre is not None else "") + path.stem + (f"{sep}{post}" if post is not None else "") | ||
| return path.parent / (new_stem + path.suffix) | ||
@@ -582,2 +602,27 @@ | ||
| def _path_to_str(obj): | ||
| if isinstance(obj, Path): | ||
| return str(obj) | ||
| if isinstance(obj, dict): | ||
| return {k: _path_to_str(v) for k, v in obj.items()} | ||
| if isinstance(obj, (list, tuple)): | ||
| return [_path_to_str(v) for v in obj] | ||
| return obj | ||
| def to_yaml(conf, *, resolve=False, sort_keys=False, **kwds): | ||
| if not OmegaConf.is_config(conf): | ||
| conf = OmegaConf.create(conf) | ||
| container = _path_to_str(OmegaConf.to_container(conf, resolve=resolve, enum_to_str=True)) | ||
| return yaml.dump(container, Dumper=get_omega_conf_dumper(), | ||
| default_flow_style=False, allow_unicode=True, sort_keys=sort_keys, **kwds) | ||
| def save_yaml(conf, path, *, resolve=False, sort_keys=False): | ||
| yaml_str = to_yaml(conf, resolve=resolve, sort_keys=sort_keys, width=4096) | ||
| output_file = Path(path) | ||
| output_file.write_text(yaml_str) | ||
| return output_file | ||
| def merge_dicts(*xs) -> dict: | ||
@@ -584,0 +629,0 @@ items = list() |
@@ -0,1 +1,2 @@ | ||
| import accelerate.utils | ||
| import time | ||
@@ -47,1 +48,35 @@ from datetime import datetime, timedelta, timezone | ||
| return f"{hh:02.0f}:{mm:02.0f}:{ss:06.3f}" | ||
| def gather_start_time() -> float: | ||
| start_time = now_stamp() | ||
| return sorted(accelerate.utils.gather_object([start_time]))[0] | ||
| def wait_for_everyone(): | ||
| return accelerate.utils.wait_for_everyone() | ||
| @contextmanager | ||
| def run_on_local_main_process(local_rank: int = int(os.getenv("LOCAL_RANK", -1))): | ||
| wait_for_everyone() | ||
| try: | ||
| if local_rank == 0: | ||
| yield | ||
| else: | ||
| yield None | ||
| finally: | ||
| wait_for_everyone() | ||
| @contextmanager | ||
| def flush_and_sleep(delay: float = 0.1): | ||
| try: | ||
| yield | ||
| finally: | ||
| try: | ||
| sys.stderr.flush() | ||
| sys.stdout.flush() | ||
| except Exception: | ||
| pass | ||
| time.sleep(delay) |
@@ -8,4 +8,7 @@ from __future__ import annotations | ||
| import re | ||
| import sys | ||
| import time | ||
| from concurrent.futures import Future | ||
| from concurrent.futures import ProcessPoolExecutor | ||
| from contextlib import contextmanager | ||
| from dataclasses import asdict | ||
@@ -214,3 +217,3 @@ from itertools import groupby | ||
| class EmptyTqdm: | ||
| class EmptyTqdm: # TODO: Remove someday | ||
| """Dummy tqdm which doesn't do anything.""" | ||
@@ -237,3 +240,3 @@ | ||
| class empty_tqdm_cls: | ||
| class empty_tqdm_cls: # TODO: Remove someday | ||
| def __init__(self, *args, **kwargs): | ||
@@ -252,3 +255,3 @@ pass | ||
| class mute_tqdm_cls: | ||
| class mute_tqdm_cls: # TODO: Remove someday | ||
| def to_desc(self, desc, pre=None): | ||
@@ -283,3 +286,3 @@ return NO.join([ | ||
| def terminate_processes(pool: ProcessPoolExecutor): | ||
| def terminate_processes(pool: ProcessPoolExecutor): # TODO: Remove someday | ||
| for proc in pool._processes.values(): | ||
@@ -286,0 +289,0 @@ if proc.is_alive(): |
| import contextlib | ||
| import json | ||
| from pathlib import Path | ||
| from sys import argv, stderr | ||
| from time import sleep | ||
| from urllib.request import urlopen | ||
| class MorpClient: | ||
| def __init__(self, netloc: str): | ||
| self.netloc = netloc | ||
| self.api_url = f"http://{self.netloc}/interface/lm_interface" | ||
| def do_mlt(self, text: str): | ||
| api_param = {"argument": {"analyzer_types": ["MORPH"], "text": text}} | ||
| try: | ||
| with contextlib.closing(urlopen(self.api_url, json.dumps(api_param).encode())) as res: | ||
| return json.loads(res.read().decode())['return_object']['json'] | ||
| except: | ||
| try: | ||
| sleep(10.0) | ||
| with contextlib.closing(urlopen(self.api_url, json.dumps(api_param).encode())) as res: | ||
| return json.loads(res.read().decode())['return_object']['json'] | ||
| except: | ||
| print("\n" + "=" * 120) | ||
| print(f'[error] Can not connect to lang_api[{self.api_url}]') | ||
| print("=" * 120 + "\n") | ||
| exit(1) | ||
| def token_only(self, text: str): | ||
| ndoc = self.do_mlt(text) | ||
| mtoks = ' '.join([f"{m['lemma']}" for s in ndoc['sentence'] for m in s['morp']]) | ||
| return mtoks | ||
| def token_tag(self, text: str): | ||
| ndoc = self.do_mlt(text) | ||
| mtags = ' '.join([f"{m['lemma']}/{m['type']}" for s in ndoc['sentence'] for m in s['morp']]) | ||
| return mtags | ||
| if __name__ == "__main__": | ||
| if len(argv) < 3: | ||
| print("[Usage] python3 morp.py infile netloc") | ||
| print(" - infile: input text file path") | ||
| print(" - netloc: network location [host:port] (e.g. localhost:7100, 127.0.0.1:7200)") | ||
| exit(1) | ||
| infile = Path(argv[1]) | ||
| if not infile.exists(): | ||
| print("No infile: " + str(infile), file=stderr) | ||
| exit(1) | ||
| client = MorpClient(netloc=argv[2]) | ||
| with infile.open(encoding='utf-8-sig') as inp: | ||
| for line in inp.readlines(): | ||
| text = line.rstrip() | ||
| print(f'base="{text}"') | ||
| print(f'toks="{client.token_only(text=text)}"') | ||
| print(f'morp="{client.token_tag(text=text)}"') | ||
| print() |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
87450
3.21%1883
4.15%19
-5%