Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoInstallSign in
Socket

chrisbase

Package Overview
Dependencies
Maintainers
1
Versions
45
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

chrisbase - npm Package Compare versions

Comparing version
0.5.9
to
0.6.0
+15
-11
PKG-INFO
Metadata-Version: 2.4
Name: chrisbase
Version: 0.5.9
Version: 0.6.0
Summary: Base library for python coding

@@ -20,21 +20,25 @@ Home-page: https://github.com/chrisjihee/chrisbase

Requires-Dist: typer
Requires-Dist: hydra-core
Requires-Dist: pydantic
Requires-Dist: omegaconf
Requires-Dist: dataclasses
Requires-Dist: dataclasses-json
Requires-Dist: httpx
Requires-Dist: ipaddress
Requires-Dist: netifaces
Requires-Dist: numpy
Requires-Dist: scipy
Requires-Dist: httpx
Requires-Dist: pandas
Requires-Dist: pymongo
Requires-Dist: pydantic
Requires-Dist: openpyxl
Requires-Dist: matplotlib
Requires-Dist: scikit-learn
Requires-Dist: tabulate
Requires-Dist: ipynbname
Requires-Dist: ipaddress
Requires-Dist: netifaces
Requires-Dist: matplotlib
Requires-Dist: filelock
Requires-Dist: openpyxl
Requires-Dist: pymongo
Requires-Dist: sqlalchemy
Requires-Dist: dataclasses
Requires-Dist: dataclasses-json
Requires-Dist: elasticsearch
Requires-Dist: more-itertools
Requires-Dist: scikit-learn
Requires-Dist: seqeval
Requires-Dist: accelerate
Dynamic: license-file

@@ -41,0 +45,0 @@

+15
-11
[metadata]
name = chrisbase
version = 0.5.9
version = 0.6.0
author = Jihee Ryu

@@ -29,21 +29,25 @@ author_email = chrisjihee@naver.com

typer
hydra-core
pydantic
omegaconf
dataclasses
dataclasses-json
httpx
ipaddress
netifaces
numpy
scipy
httpx
pandas
pymongo
pydantic
openpyxl
matplotlib
scikit-learn
tabulate
ipynbname
ipaddress
netifaces
matplotlib
filelock
openpyxl
pymongo
sqlalchemy
dataclasses
dataclasses-json
elasticsearch
more-itertools
scikit-learn
seqeval
accelerate

@@ -50,0 +54,0 @@ [options.packages.find]

Metadata-Version: 2.4
Name: chrisbase
Version: 0.5.9
Version: 0.6.0
Summary: Base library for python coding

@@ -20,21 +20,25 @@ Home-page: https://github.com/chrisjihee/chrisbase

Requires-Dist: typer
Requires-Dist: hydra-core
Requires-Dist: pydantic
Requires-Dist: omegaconf
Requires-Dist: dataclasses
Requires-Dist: dataclasses-json
Requires-Dist: httpx
Requires-Dist: ipaddress
Requires-Dist: netifaces
Requires-Dist: numpy
Requires-Dist: scipy
Requires-Dist: httpx
Requires-Dist: pandas
Requires-Dist: pymongo
Requires-Dist: pydantic
Requires-Dist: openpyxl
Requires-Dist: matplotlib
Requires-Dist: scikit-learn
Requires-Dist: tabulate
Requires-Dist: ipynbname
Requires-Dist: ipaddress
Requires-Dist: netifaces
Requires-Dist: matplotlib
Requires-Dist: filelock
Requires-Dist: openpyxl
Requires-Dist: pymongo
Requires-Dist: sqlalchemy
Requires-Dist: dataclasses
Requires-Dist: dataclasses-json
Requires-Dist: elasticsearch
Requires-Dist: more-itertools
Requires-Dist: scikit-learn
Requires-Dist: seqeval
Requires-Dist: accelerate
Dynamic: license-file

@@ -41,0 +45,0 @@

tqdm
typer
hydra-core
pydantic
omegaconf
dataclasses
dataclasses-json
httpx
ipaddress
netifaces
numpy
scipy
httpx
pandas
pymongo
pydantic
openpyxl
matplotlib
scikit-learn
tabulate
ipynbname
ipaddress
netifaces
matplotlib
filelock
openpyxl
pymongo
sqlalchemy
dataclasses
dataclasses-json
elasticsearch
more-itertools
scikit-learn
seqeval
accelerate

@@ -9,3 +9,2 @@ LICENSE

src/chrisbase/io.py
src/chrisbase/morp.py
src/chrisbase/net.py

@@ -12,0 +11,0 @@ src/chrisbase/time.py

@@ -0,1 +1,3 @@

import subprocess
from typer import Typer

@@ -6,2 +8,11 @@

def run_command(cmd):
cmd = cmd.strip().split()
print("*" * 120)
print("[COMMAND]", " ".join(cmd))
print("*" * 120)
subprocess.run(cmd)
print("\n" * 3)
@app.command()

@@ -8,0 +19,0 @@ def hello():

@@ -7,2 +7,3 @@ import itertools

import warnings
from contextlib import contextmanager
from dataclasses import dataclass

@@ -18,2 +19,3 @@ from dataclasses import field

import datasets
import pandas as pd

@@ -24,14 +26,14 @@ import pymongo.collection

import typer
from chrisbase.io import get_hostname, get_hostaddr, current_file, first_or, cwd, hr, flush_or, make_parent_dir, setup_unit_logger, setup_dual_logger, open_file, file_lines, new_path, get_http_clients, log_table, LoggingFormat, to_yaml
from chrisbase.time import now, str_delta
from chrisbase.util import tupled, SP, NO, to_dataframe
from dataclasses_json import DataClassJsonMixin
from elasticsearch import Elasticsearch
from more_itertools import ichunked
from pydantic import BaseModel, Field, model_validator, ConfigDict
from omegaconf import DictConfig, OmegaConf
from pydantic import BaseModel, Field, ConfigDict, model_validator, field_validator
from pymongo import MongoClient
from transformers import set_seed
from typing_extensions import Self
from chrisbase.io import get_hostname, get_hostaddr, current_file, first_or, cwd, hr, flush_or, make_parent_dir, setup_unit_logger, setup_dual_logger, open_file, file_lines, new_path, get_http_clients, log_table, LoggingFormat
from chrisbase.time import now, str_delta
from chrisbase.util import tupled, SP, NO, to_dataframe
from transformers import set_seed
logger = logging.getLogger(__name__)

@@ -50,8 +52,44 @@

def run(*functions: Callable[..., Any], **kwargs) -> None:
app = AppTyper(**kwargs)
app = AppTyper()
for function in functions:
app.command()(function)
app.command(**kwargs)(function)
app()
@contextmanager
def temporary_mutable_conf(*cfgs):
"""
여러 OmegaConf 객체를 with 블록 동안만 수정 가능하게 만들고,
블록이 끝나면 원래 readonly 상태를 복원한다.
"""
original_states = [OmegaConf.is_readonly(c) for c in cfgs]
# 모두 mutable 로 설정
for c in cfgs:
OmegaConf.set_readonly(c, False)
try:
yield # ‼️ 단 한 번만 yield
finally:
# 원래 상태로 복원
for c, state in zip(cfgs, original_states):
OmegaConf.set_readonly(c, state)
@contextmanager
def disable_datasets_progress():
"""
Context manager to temporarily disable the datasets progress bar.
On entering, it calls datasets.disable_progress_bar(),
and on exiting, it restores the progress bar with datasets.enable_progress_bar().
"""
# Turn off the progress bar
datasets.disable_progress_bar()
try:
yield
finally:
# Always re-enable the progress bar, even if an error occurs
datasets.enable_progress_bar()
class NewProjectEnv(BaseModel):

@@ -75,3 +113,3 @@ hostname: str = get_hostname()

logging_level: int = Field(default=logging.INFO)
logging_format: str = Field(default=LoggingFormat.BRIEF_00)
logging_format: LoggingFormat = Field(default=LoggingFormat.BRIEF_00)
datetime_format: str = Field(default="[%m.%d %H:%M:%S]")

@@ -84,2 +122,17 @@ argument_file: str | Path = Field(default=None)

@field_validator('logging_format', mode='before')
def validate_logging_format(cls, v):
# 만약 입력값이 문자열이라면, 해당 문자열이 LoggingFormat의 멤버 이름과 일치하는지 확인
if isinstance(v, str):
try:
# 간단하게 Enum 멤버 이름으로 변환
return LoggingFormat[v]
except KeyError:
# 만약 Enum 멤버 이름이 아니라면, 실제 값과 일치하는지 체크
for member in LoggingFormat:
if v == member.value:
return member
raise ValueError(f"Invalid logging_format: {v}.")
return v
@model_validator(mode='after')

@@ -103,3 +156,6 @@ def after(self) -> Self:

setup_dual_logger(
level=logging_level, fmt=self.logging_format, datefmt=self.datetime_format, stream=sys.stdout,
level=logging_level,
fmt=self.logging_format.value,
datefmt=self.datetime_format,
stream=sys.stdout,
filename=self.output_dir / self.logging_file,

@@ -109,3 +165,6 @@ )

setup_unit_logger(
level=logging_level, fmt=self.logging_format, datefmt=self.datetime_format, stream=sys.stdout,
level=logging_level,
fmt=self.logging_format.value,
datefmt=self.datetime_format,
stream=sys.stdout,
)

@@ -793,17 +852,4 @@ return self

class RuntimeChecking:
def __init__(self, args: CommonArguments):
self.args: CommonArguments = args
def __enter__(self):
self.args.time.set_started()
self.args.save_args()
def __exit__(self, *exc_info):
self.args.time.set_settled()
self.args.save_args()
class JobTimer:
def __init__(self, name=None, args: CommonArguments | NewCommonArguments = None, prefix=None, postfix=None,
def __init__(self, name=None, args: CommonArguments | NewCommonArguments | DictConfig = None, prefix=None, postfix=None,
verbose=1, mt=0, mb=0, pt=0, pb=0, rt=0, rb=0, rc='-', rw=137,

@@ -861,7 +907,15 @@ flush_sec=0.1, mute_loggers=None, mute_warning=None):

if self.args:
self.args.time.set_started()
if hasattr(self.args, "time"):
self.args.time.set_started()
if self.verbose >= 1:
self.args.info_args(c='-', w=self.rw)
if hasattr(self.args, "info_args"):
self.args.info_args(c="-", w=self.rw)
else:
yaml_str = to_yaml(self.args, resolve=True, width=4096).rstrip()
logger.info("[args]")
sum(logger.info(f" {l}") or 1 for l in yaml_str.splitlines())
logger.info(hr(c=self.rc, w=self.rw))
if self.verbose >= 2:
self.args.save_args()
if hasattr(self.args, "save_args"):
self.args.save_args()
self.t1 = datetime.now()

@@ -877,3 +931,4 @@ return self

if self.args:
self.args.time.set_settled()
if hasattr(self.args, "time"):
self.args.time.set_settled()
if self.verbose >= 2:

@@ -880,0 +935,0 @@ self.args.save_args()

@@ -24,8 +24,10 @@ import bz2

import pandas as pd
import yaml
from chrisbase.time import from_timestamp
from chrisbase.util import tupled, OX
from omegaconf import OmegaConf
from omegaconf._utils import get_omega_conf_dumper
from tabulate import tabulate
from tensorboard.backend.event_processing import event_accumulator
from chrisbase.time import from_timestamp
from chrisbase.util import tupled, NO, OX
logger = logging.getLogger(__name__)

@@ -35,42 +37,44 @@ sys_stdout = sys.stdout

from enum import Enum
class LoggingFormat:
PRINT_00: str = ' ┇ '.join(['%(message)s'])
PRINT_12: str = ' ┇ '.join(['%(name)12s', '%(message)s'])
PRINT_16: str = ' ┇ '.join(['%(name)16s', '%(message)s'])
PRINT_20: str = ' ┇ '.join(['%(name)20s', '%(message)s'])
BRIEF_00: str = ' ┇ '.join(['%(asctime)s', '%(message)s'])
BRIEF_12: str = ' ┇ '.join(['%(asctime)s', '%(name)12s', '%(message)s'])
BRIEF_16: str = ' ┇ '.join(['%(asctime)s', '%(name)16s', '%(message)s'])
BRIEF_20: str = ' ┇ '.join(['%(asctime)s', '%(name)20s', '%(message)s'])
CHECK_00: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(message)s'])
CHECK_12: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)12s', '%(message)s'])
CHECK_16: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)16s', '%(message)s'])
CHECK_20: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)20s', '%(message)s'])
CHECK_24: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)24s', '%(message)s'])
CHECK_28: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)28s', '%(message)s'])
CHECK_32: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)32s', '%(message)s'])
CHECK_36: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)36s', '%(message)s'])
CHECK_40: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)40s', '%(message)s'])
CHECK_48: str = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)48s', '%(message)s'])
TRACE_12: str = ' ┇ '.join(['%(asctime)s', '%(filename)12s:%(lineno)-4d', '%(message)s'])
TRACE_16: str = ' ┇ '.join(['%(asctime)s', '%(filename)16s:%(lineno)-4d', '%(message)s'])
TRACE_20: str = ' ┇ '.join(['%(asctime)s', '%(filename)20s:%(lineno)-4d', '%(message)s'])
TRACE_24: str = ' ┇ '.join(['%(asctime)s', '%(filename)24s:%(lineno)-4d', '%(message)s'])
TRACE_28: str = ' ┇ '.join(['%(asctime)s', '%(filename)28s:%(lineno)-4d', '%(message)s'])
TRACE_32: str = ' ┇ '.join(['%(asctime)s', '%(filename)32s:%(lineno)-4d', '%(message)s'])
TRACE_36: str = ' ┇ '.join(['%(asctime)s', '%(filename)36s:%(lineno)-4d', '%(message)s'])
TRACE_40: str = ' ┇ '.join(['%(asctime)s', '%(filename)40s:%(lineno)-4d', '%(message)s'])
DEBUG_00: str = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(message)s'])
DEBUG_12: str = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)12s', '%(message)s'])
DEBUG_16: str = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)16s', '%(message)s'])
DEBUG_20: str = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)20s', '%(message)s'])
DEBUG_24: str = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)24s', '%(message)s'])
DEBUG_28: str = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)28s', '%(message)s'])
DEBUG_32: str = ' ┇ '.join(['%(pathname)90s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)32s', '%(message)s'])
DEBUG_36: str = ' ┇ '.join(['%(pathname)90s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)36s', '%(message)s'])
DEBUG_40: str = ' ┇ '.join(['%(pathname)120s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)40s', '%(message)s'])
DEBUG_48: str = ' ┇ '.join(['%(pathname)120s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)48s', '%(message)s'])
class LoggingFormat(Enum):
PRINT_00 = ' ┇ '.join(['%(message)s'])
PRINT_12 = ' ┇ '.join(['%(name)12s', '%(message)s'])
PRINT_16 = ' ┇ '.join(['%(name)16s', '%(message)s'])
PRINT_20 = ' ┇ '.join(['%(name)20s', '%(message)s'])
BRIEF_00 = ' ┇ '.join(['%(asctime)s', '%(message)s'])
BRIEF_12 = ' ┇ '.join(['%(asctime)s', '%(name)12s', '%(message)s'])
BRIEF_16 = ' ┇ '.join(['%(asctime)s', '%(name)16s', '%(message)s'])
BRIEF_20 = ' ┇ '.join(['%(asctime)s', '%(name)20s', '%(message)s'])
CHECK_00 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(message)s'])
CHECK_12 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)12s', '%(message)s'])
CHECK_16 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)16s', '%(message)s'])
CHECK_20 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)20s', '%(message)s'])
CHECK_24 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)24s', '%(message)s'])
CHECK_28 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)28s', '%(message)s'])
CHECK_32 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)32s', '%(message)s'])
CHECK_36 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)36s', '%(message)s'])
CHECK_40 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)40s', '%(message)s'])
CHECK_48 = ' ┇ '.join(['%(asctime)s', '%(levelname)-7s', '%(name)48s', '%(message)s'])
TRACE_12 = ' ┇ '.join(['%(asctime)s', '%(filename)12s:%(lineno)-4d', '%(message)s'])
TRACE_16 = ' ┇ '.join(['%(asctime)s', '%(filename)16s:%(lineno)-4d', '%(message)s'])
TRACE_20 = ' ┇ '.join(['%(asctime)s', '%(filename)20s:%(lineno)-4d', '%(message)s'])
TRACE_24 = ' ┇ '.join(['%(asctime)s', '%(filename)24s:%(lineno)-4d', '%(message)s'])
TRACE_28 = ' ┇ '.join(['%(asctime)s', '%(filename)28s:%(lineno)-4d', '%(message)s'])
TRACE_32 = ' ┇ '.join(['%(asctime)s', '%(filename)32s:%(lineno)-4d', '%(message)s'])
TRACE_36 = ' ┇ '.join(['%(asctime)s', '%(filename)36s:%(lineno)-4d', '%(message)s'])
TRACE_40 = ' ┇ '.join(['%(asctime)s', '%(filename)40s:%(lineno)-4d', '%(message)s'])
DEBUG_00 = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(message)s'])
DEBUG_12 = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)12s', '%(message)s'])
DEBUG_16 = ' ┇ '.join(['%(pathname)60s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)16s', '%(message)s'])
DEBUG_20 = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)20s', '%(message)s'])
DEBUG_24 = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)24s', '%(message)s'])
DEBUG_28 = ' ┇ '.join(['%(pathname)70s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)28s', '%(message)s'])
DEBUG_32 = ' ┇ '.join(['%(pathname)90s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)32s', '%(message)s'])
DEBUG_36 = ' ┇ '.join(['%(pathname)90s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)36s', '%(message)s'])
DEBUG_40 = ' ┇ '.join(['%(pathname)120s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)40s', '%(message)s'])
DEBUG_48 = ' ┇ '.join(['%(pathname)120s:%(lineno)-5d', '%(asctime)s', '%(levelname)-7s', '%(name)48s', '%(message)s'])
class LoggerWriter:

@@ -385,2 +389,18 @@ def __init__(self, logger: logging.Logger, level: int = logging.INFO):

def count_dirs(path, key, sub=None):
path = Path(path)
if not sub:
return sum(1 for x in path.glob(f"*{key}*") if x.is_dir())
else:
return sum(1 for x in path.glob(f"*{key}*/*{sub}*") if x.is_dir())
def count_files(path, key, sub=None):
path = Path(path)
if not sub:
return sum(1 for x in path.glob(f"*{key}*") if x.is_file())
else:
return sum(1 for x in path.glob(f"*{key}*/**/*{sub}*") if x.is_file())
def paths_info(*xs, to_pathlist=paths, to_filename=str, sort_key=None):

@@ -473,4 +493,4 @@ from chrisbase.util import to_dataframe

path = Path(path)
new_name = (f"{pre}{sep}" if pre is not None else "") + path.stem + (f"{sep}{post}" if post is not None else "")
return path.parent / (new_name + NO.join(path.suffixes))
new_stem = (f"{pre}{sep}" if pre is not None else "") + path.stem + (f"{sep}{post}" if post is not None else "")
return path.parent / (new_stem + path.suffix)

@@ -582,2 +602,27 @@

def _path_to_str(obj):
if isinstance(obj, Path):
return str(obj)
if isinstance(obj, dict):
return {k: _path_to_str(v) for k, v in obj.items()}
if isinstance(obj, (list, tuple)):
return [_path_to_str(v) for v in obj]
return obj
def to_yaml(conf, *, resolve=False, sort_keys=False, **kwds):
if not OmegaConf.is_config(conf):
conf = OmegaConf.create(conf)
container = _path_to_str(OmegaConf.to_container(conf, resolve=resolve, enum_to_str=True))
return yaml.dump(container, Dumper=get_omega_conf_dumper(),
default_flow_style=False, allow_unicode=True, sort_keys=sort_keys, **kwds)
def save_yaml(conf, path, *, resolve=False, sort_keys=False):
yaml_str = to_yaml(conf, resolve=resolve, sort_keys=sort_keys, width=4096)
output_file = Path(path)
output_file.write_text(yaml_str)
return output_file
def merge_dicts(*xs) -> dict:

@@ -584,0 +629,0 @@ items = list()

@@ -0,1 +1,2 @@

import accelerate.utils
import time

@@ -47,1 +48,35 @@ from datetime import datetime, timedelta, timezone

return f"{hh:02.0f}:{mm:02.0f}:{ss:06.3f}"
def gather_start_time() -> float:
start_time = now_stamp()
return sorted(accelerate.utils.gather_object([start_time]))[0]
def wait_for_everyone():
return accelerate.utils.wait_for_everyone()
@contextmanager
def run_on_local_main_process(local_rank: int = int(os.getenv("LOCAL_RANK", -1))):
wait_for_everyone()
try:
if local_rank == 0:
yield
else:
yield None
finally:
wait_for_everyone()
@contextmanager
def flush_and_sleep(delay: float = 0.1):
try:
yield
finally:
try:
sys.stderr.flush()
sys.stdout.flush()
except Exception:
pass
time.sleep(delay)

@@ -8,4 +8,7 @@ from __future__ import annotations

import re
import sys
import time
from concurrent.futures import Future
from concurrent.futures import ProcessPoolExecutor
from contextlib import contextmanager
from dataclasses import asdict

@@ -214,3 +217,3 @@ from itertools import groupby

class EmptyTqdm:
class EmptyTqdm: # TODO: Remove someday
"""Dummy tqdm which doesn't do anything."""

@@ -237,3 +240,3 @@

class empty_tqdm_cls:
class empty_tqdm_cls: # TODO: Remove someday
def __init__(self, *args, **kwargs):

@@ -252,3 +255,3 @@ pass

class mute_tqdm_cls:
class mute_tqdm_cls: # TODO: Remove someday
def to_desc(self, desc, pre=None):

@@ -283,3 +286,3 @@ return NO.join([

def terminate_processes(pool: ProcessPoolExecutor):
def terminate_processes(pool: ProcessPoolExecutor): # TODO: Remove someday
for proc in pool._processes.values():

@@ -286,0 +289,0 @@ if proc.is_alive():

import contextlib
import json
from pathlib import Path
from sys import argv, stderr
from time import sleep
from urllib.request import urlopen
class MorpClient:
def __init__(self, netloc: str):
self.netloc = netloc
self.api_url = f"http://{self.netloc}/interface/lm_interface"
def do_mlt(self, text: str):
api_param = {"argument": {"analyzer_types": ["MORPH"], "text": text}}
try:
with contextlib.closing(urlopen(self.api_url, json.dumps(api_param).encode())) as res:
return json.loads(res.read().decode())['return_object']['json']
except:
try:
sleep(10.0)
with contextlib.closing(urlopen(self.api_url, json.dumps(api_param).encode())) as res:
return json.loads(res.read().decode())['return_object']['json']
except:
print("\n" + "=" * 120)
print(f'[error] Can not connect to lang_api[{self.api_url}]')
print("=" * 120 + "\n")
exit(1)
def token_only(self, text: str):
ndoc = self.do_mlt(text)
mtoks = ' '.join([f"{m['lemma']}" for s in ndoc['sentence'] for m in s['morp']])
return mtoks
def token_tag(self, text: str):
ndoc = self.do_mlt(text)
mtags = ' '.join([f"{m['lemma']}/{m['type']}" for s in ndoc['sentence'] for m in s['morp']])
return mtags
if __name__ == "__main__":
if len(argv) < 3:
print("[Usage] python3 morp.py infile netloc")
print(" - infile: input text file path")
print(" - netloc: network location [host:port] (e.g. localhost:7100, 127.0.0.1:7200)")
exit(1)
infile = Path(argv[1])
if not infile.exists():
print("No infile: " + str(infile), file=stderr)
exit(1)
client = MorpClient(netloc=argv[2])
with infile.open(encoding='utf-8-sig') as inp:
for line in inp.readlines():
text = line.rstrip()
print(f'base="{text}"')
print(f'toks="{client.token_only(text=text)}"')
print(f'morp="{client.token_tag(text=text)}"')
print()