# -----------  read file ---------------------------------
from pathlib import Path
from mlcf.datatools.data_reader import (
    read_ohlcv_json_from_file,
    read_ohlcv_json_from_dir,
    read_json_file
)

# from a ohlcv json file
data = read_ohlcv_json_from_file(Path("tests/testdata/ETH_BUSD-15m.json"))

# from a directory, a pair, and a timeframe
pair = "ETH_BUSD"
tf = "15m"
data = read_ohlcv_json_from_dir(Path("tests/testdata/"), pair=pair, timeframe=tf)

# read a json file (but not necessary a OHLCV file)
data = read_json_file(Path("tests/testdata/meteo.json"), 'time', ["time", "Temperature"])

# -------------------------------------------------------

Indicator Module

# ------------------- Indicators module -----------------------------
from mlcf.indicators.add_indicators import add_intern_indicator

# you can add yoursel your own indicators or features
data["return"] = data["close"].pct_change(1)
data.dropna(inplace=True)  # make sure to drop nan values

# you can add intern indicator
data = add_intern_indicator(data, indice_name="adx")
# -------------------------------------------------------

Label Tool

# ------------------- Labelize Tool -----------------------------
from mlcf.datatools.utils import labelize

# A good practice is to take the mean and the standard deviation of the value you want to
# labelize
mean = data["return"].mean()
std = data["return"].std()

# Here you give the value you want to labelize with column='return'. The new of the labels column
# will be the name give to 'label_col_name'
data = labelize(
    data,
    column="return",
    labels=5,
    bounds=(mean-std, mean+std),
    label_col_name="label"
)

Data Intervals Module, Standardization Tools and WindowFilter Tool

# ------------------- Data Intervals Module and Standardization Tools -----------------------------
from mlcf.datatools.data_intervals import DataIntervals
from mlcf.datatools.standardize_fct import ClassicStd, MinMaxStd
from mlcf.datatools.windowing.filter import LabelBalanceFilter

# We define a dict which give us the information about what standardization apply to each columns.
std_by_features = {
    "close": ClassicStd(),
    "return": ClassicStd(with_mean=False),  # to avoid to shift we don't center
    "adx": MinMaxStd(minmax=(0, 100))  # the value observed in the adx are between 0 and 100 and we
                                       # want to set it between 0 and 1.
}
data_intervals = DataIntervals.create_data_intervals_obj(data, n_intervals=10)
data_intervals.standardize(std_by_features)

# We can apply a filter the dataset we want. Here we will filter the values in order to balance
# the histogram of return value. For this, we use the label previously process on return.
filter_by_set = {
    "train": LabelBalanceFilter("label")  # the column we will balance the data is 'label
                                          # the max count will be automatically process
}

# dict_train_val_test is a dict with the key 'train', 'val', 'test'. The value of the dict is a
# WTSeries (a windowed time series).
dict_train_val_test = data_intervals.windowing(
    window_width=30,
    window_step=1,
    selected_columns=["close", "return", "adx"],
    filter_by_dataset=filter_by_set
)
# -------------------------------------------------------

Window Iterator Tool

# -------------------- Window Iterator Tool --------------------

# If we don't want to use the Data Interval Module. We can simple use a WTSeries with our data.
from mlcf.datatools.windowing.tseries import WTSeriesLite

# To create a WTSeries from pandas.DataFrame
wtseries = WTSeriesLite.create_wtseries_lite(
    dataframe=data,
    window_width=30,
    window_step=1,
    selected_columns=["close", "return", "adx"],
    window_filter=LabelBalanceFilter("label")
)

# Or from a wtseries .h5 file:
wtseries = WTSeriesLite.read(Path("/tests/testdata/wtseries.h5"))

# We can save the wtseries as a file.
wtseries.write(Path("/tests/testdata", "wtseries"))

# we can iterate over the wtseries:
for window in wtseries:
    pass
    # Where window is a pd.Dataframe representing a window.

# -------------------------------------------------------

Forecast Window Iterator Tool

# -------------------- Forecast Window Iterator Tool --------------------

# This class allow us to iterate over a WTSeries but the iteration
# (__getitem__) give us a tuple of 2

from mlcf.datatools.windowing.forecast_iterator import WindowForecastIterator

data_train = WindowForecastIterator(
    wtseries,
    input_width=29,
    target_width=1,  # The sum of the input_width and target_width must not exceed the window width
                     # of the wtseries
    input_features=["close", "adx"],
    target_features=["return"]
)
for window in data_train:
    window_input, window_target = window
    pass
# -------------------------------------------------------

Keywords

FAQs

What is mlcf?

Is mlcf well maintained?

Did you know?

Socket for GitHub automatically highlights issues in each pull request and monitors the health of all your open source dependencies. Discover the contents of your packages and block harmful activity before you install or update your dependencies.

Install

mlcf

MLCF - Machine Learning Toolkit for Cryptocurrency Forecasting

Installation

Installation for Linux (python v3.7)

Installation for Linux (python v3.8, v3.9)

MLCF example module usage

File reader module

Indicator Module

Label Tool

Data Intervals Module, Standardization Tools and WindowFilter Tool

Window Iterator Tool

Forecast Window Iterator Tool

Keywords

Related posts

9 Malicious NuGet Packages Deliver Time-Delayed Destructive Payloads

How Enterprise Security Is Adapting to AI-Accelerated Threats