arkas - npm Package Compare versions

+83

src/arkas/analyzer/continuous_column.py

		r"""Implement an analyzer that analyzes a column with continuous
		values."""

		from __future__ import annotations

		__all__ = ["ContinuousColumnAnalyzer"]

		import logging
		from typing import TYPE_CHECKING, Any

		from coola import objects_are_equal
		from coola.utils.format import repr_mapping_line

		from arkas.analyzer.lazy import BaseLazyAnalyzer
		from arkas.output.continuous_series import ContinuousSeriesOutput
		from arkas.state.series import SeriesState

		if TYPE_CHECKING:
		import polars as pl

		from arkas.figure import BaseFigureConfig

		logger = logging.getLogger(__name__)


		class ContinuousColumnAnalyzer(BaseLazyAnalyzer):
		r"""Implement an analyzer that analyzes a column with continuous
		values.

		Args:
		column: The column to analyze.
		figure_config: The figure configuration.

		Example usage:

		```pycon

		>>> import polars as pl
		>>> from arkas.analyzer import ContinuousColumnAnalyzer
		>>> analyzer = ContinuousColumnAnalyzer(column="col1")
		>>> analyzer
		ContinuousColumnAnalyzer(column='col1', figure_config=None)
		>>> frame = pl.DataFrame(
		... {
		... "col1": [0, 1, 0, 1],
		... "col2": [1, 0, 1, 0],
		... "col3": [1, 1, 1, 1],
		... },
		... schema={"col1": pl.Int64, "col2": pl.Int64, "col3": pl.Int64},
		... )
		>>> output = analyzer.analyze(frame)
		>>> output
		ContinuousSeriesOutput(
		(state): SeriesState(name='col1', values=(4,), figure_config=MatplotlibFigureConfig())
		)

		```
		"""

		def __init__(self, column: str, figure_config: BaseFigureConfig \| None = None) -> None:
		self._column = column
		self._figure_config = figure_config

		def __repr__(self) -> str:
		args = repr_mapping_line(self.get_args())
		return f"{self.__class__.__qualname__}({args})"

		def equal(self, other: Any, equal_nan: bool = False) -> bool:
		if not isinstance(other, self.__class__):
		return False
		return objects_are_equal(self.get_args(), other.get_args(), equal_nan=equal_nan)

		def get_args(self) -> dict:
		return {"column": self._column, "figure_config": self._figure_config}

		def _analyze(self, frame: pl.DataFrame) -> ContinuousSeriesOutput:
		logger.info(f"Analyzing the continuous distribution of column {self._column!r}...")
		return ContinuousSeriesOutput(
		state=SeriesState(
		series=frame[self._column],
		figure_config=self._figure_config,
		)
		)

+98

src/arkas/analyzer/null_value.py

		r"""Implement an analyzer that plots the content of each column."""

		from __future__ import annotations

		__all__ = ["NullValueAnalyzer"]

		import logging
		from typing import TYPE_CHECKING

		from grizz.utils.format import str_shape_diff

		from arkas.analyzer.lazy import BaseInNLazyAnalyzer
		from arkas.output.null_value import NullValueOutput
		from arkas.state.null_value import NullValueState

		if TYPE_CHECKING:
		from collections.abc import Sequence

		import polars as pl

		from arkas.figure import BaseFigureConfig

		logger = logging.getLogger(__name__)


		class NullValueAnalyzer(BaseInNLazyAnalyzer):
		r"""Implement an analyzer that plots the content of each column.

		Args:
		columns: The columns to analyze. If ``None``, it analyzes all
		the columns.
		exclude_columns: The columns to exclude from the input
		``columns``. If any column is not found, it will be ignored
		during the filtering process.
		missing_policy: The policy on how to handle missing columns.
		The following options are available: ``'ignore'``,
		``'warn'``, and ``'raise'``. If ``'raise'``, an exception
		is raised if at least one column is missing.
		If ``'warn'``, a warning is raised if at least one column
		is missing and the missing columns are ignored.
		If ``'ignore'``, the missing columns are ignored and
		no warning message appears.
		figure_config: The figure configuration.

		Example usage:

		```pycon

		>>> import polars as pl
		>>> from arkas.analyzer import NullValueAnalyzer
		>>> analyzer = NullValueAnalyzer()
		>>> analyzer
		NullValueAnalyzer(columns=None, exclude_columns=(), missing_policy='raise', figure_config=None)
		>>> frame = pl.DataFrame(
		... {
		... "col1": [0, 1, 1, 0, 0, 1, None],
		... "col2": [0, 1, None, None, 0, 1, 0],
		... "col3": [None, 0, 0, 0, None, 1, None],
		... }
		... )
		>>> output = analyzer.analyze(frame)
		>>> output
		NullValueOutput(
		(state): NullValueState(num_columns=3, figure_config=MatplotlibFigureConfig())
		)

		```
		"""

		def __init__(
		self,
		columns: Sequence[str] \| None = None,
		exclude_columns: Sequence[str] = (),
		missing_policy: str = "raise",
		figure_config: BaseFigureConfig \| None = None,
		) -> None:
		super().__init__(
		columns=columns,
		exclude_columns=exclude_columns,
		missing_policy=missing_policy,
		)
		self._figure_config = figure_config

		def get_args(self) -> dict:
		return super().get_args() \| {
		"figure_config": self._figure_config,
		}

		def _analyze(self, frame: pl.DataFrame) -> NullValueOutput:
		logger.info(f"Plotting the content of {len(self.find_columns(frame)):,} columns...")
		columns = self.find_common_columns(frame)
		dataframe = frame.select(columns)
		logger.info(str_shape_diff(orig=frame.shape, final=dataframe.shape))
		return NullValueOutput(
		state=NullValueState.from_dataframe(
		dataframe=dataframe, figure_config=self._figure_config
		)
		)

+128

src/arkas/analyzer/temporal_null_value.py

		r"""Implement an analyzer that plots the content of each column."""

		from __future__ import annotations

		__all__ = ["TemporalNullValueAnalyzer"]

		import logging
		from typing import TYPE_CHECKING

		from grizz.utils.format import str_shape_diff

		from arkas.analyzer.lazy import BaseInNLazyAnalyzer
		from arkas.output.temporal_null_value import TemporalNullValueOutput
		from arkas.state.temporal_dataframe import TemporalDataFrameState

		if TYPE_CHECKING:
		from collections.abc import Sequence

		import polars as pl

		from arkas.figure import BaseFigureConfig

		logger = logging.getLogger(__name__)


		class TemporalNullValueAnalyzer(BaseInNLazyAnalyzer):
		r"""Implement an analyzer that analyzes the number of null values in
		a DataFrame.

		Args:
		temporal_column: The temporal column in the DataFrame.
		period: The temporal period e.g. monthly or daily.
		columns: The columns to analyze. If ``None``, it analyzes all
		the columns.
		exclude_columns: The columns to exclude from the input
		``columns``. If any column is not found, it will be ignored
		during the filtering process.
		missing_policy: The policy on how to handle missing columns.
		The following options are available: ``'ignore'``,
		``'warn'``, and ``'raise'``. If ``'raise'``, an exception
		is raised if at least one column is missing.
		If ``'warn'``, a warning is raised if at least one column
		is missing and the missing columns are ignored.
		If ``'ignore'``, the missing columns are ignored and
		no warning message appears.
		figure_config: The figure configuration.

		Example usage:

		```pycon

		>>> from datetime import datetime, timezone
		>>> import polars as pl
		>>> from arkas.analyzer import TemporalNullValueAnalyzer
		>>> analyzer = TemporalNullValueAnalyzer(temporal_column="datetime", period="1d")
		>>> analyzer
		TemporalNullValueAnalyzer(columns=None, exclude_columns=(), missing_policy='raise', temporal_column='datetime', period='1d', figure_config=None)
		>>> frame = pl.DataFrame(
		... {
		... "col1": [0, 1, 1, 0],
		... "col2": [0, 1, 0, 1],
		... "col3": [1, 0, 0, 0],
		... "datetime": [
		... datetime(year=2020, month=1, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=2, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=3, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=4, day=3, tzinfo=timezone.utc),
		... ],
		... },
		... schema={
		... "col1": pl.Int64,
		... "col2": pl.Int64,
		... "col3": pl.Int64,
		... "datetime": pl.Datetime(time_unit="us", time_zone="UTC"),
		... },
		... )
		>>> output = analyzer.analyze(frame)
		>>> output
		TemporalNullValueOutput(
		(state): TemporalDataFrameState(dataframe=(4, 4), temporal_column='datetime', period='1d', figure_config=MatplotlibFigureConfig())
		)

		```
		"""

		def __init__(
		self,
		temporal_column: str,
		period: str,
		columns: Sequence[str] \| None = None,
		exclude_columns: Sequence[str] = (),
		missing_policy: str = "raise",
		figure_config: BaseFigureConfig \| None = None,
		) -> None:
		super().__init__(
		columns=columns,
		exclude_columns=exclude_columns,
		missing_policy=missing_policy,
		)
		self._temporal_column = temporal_column
		self._period = period
		self._figure_config = figure_config

		def get_args(self) -> dict:
		return super().get_args() \| {
		"temporal_column": self._temporal_column,
		"period": self._period,
		"figure_config": self._figure_config,
		}

		def _analyze(self, frame: pl.DataFrame) -> TemporalNullValueOutput:
		logger.info(
		f"Plotting the number of null values of {len(self.find_columns(frame)):,} columns "
		f"using the temporal column {self._temporal_column!r} and period {self._period!r}..."
		)
		columns = list(self.find_common_columns(frame))
		if self._temporal_column not in columns:
		columns.append(self._temporal_column)
		dataframe = frame.select(columns)
		logger.info(str_shape_diff(orig=frame.shape, final=dataframe.shape))
		return TemporalNullValueOutput(
		state=TemporalDataFrameState(
		dataframe=dataframe,
		temporal_column=self._temporal_column,
		period=self._period,
		figure_config=self._figure_config,
		)
		)

+236

src/arkas/content/continuous_series.py

		r"""Contain the implementation of a HTML content generator that analyzes
		a Series with continuous values."""

		from __future__ import annotations

		__all__ = ["ContinuousSeriesContentGenerator", "create_template"]

		import logging
		from typing import TYPE_CHECKING, Any

		from coola.utils import repr_indent, repr_mapping, str_indent, str_mapping
		from jinja2 import Template

		from arkas.content.section import BaseSectionContentGenerator
		from arkas.figure.utils import figure2html
		from arkas.plotter.continuous_series import ContinuousSeriesPlotter
		from arkas.utils.range import find_range
		from arkas.utils.stats import compute_statistics_continuous

		if TYPE_CHECKING:
		from arkas.state.series import SeriesState


		logger = logging.getLogger(__name__)


		class ContinuousSeriesContentGenerator(BaseSectionContentGenerator):
		r"""Implement a content generator that analyzes a Series with
		continuous values.

		Args:
		state: The state containing the Series to analyze.

		Example usage:

		```pycon

		>>> import polars as pl
		>>> from arkas.content import ContinuousSeriesContentGenerator
		>>> from arkas.state import SeriesState
		>>> content = ContinuousSeriesContentGenerator(
		... SeriesState(pl.Series("col1", [1, 2, 3, 4, 5, 6, 7]))
		... )
		>>> content
		ContinuousSeriesContentGenerator(
		(state): SeriesState(name='col1', values=(7,), figure_config=MatplotlibFigureConfig())
		)

		```
		"""

		def __init__(self, state: SeriesState) -> None:
		self._state = state

		def __repr__(self) -> str:
		args = repr_indent(repr_mapping({"state": self._state}))
		return f"{self.__class__.__qualname__}(\n {args}\n)"

		def __str__(self) -> str:
		args = str_indent(str_mapping({"state": self._state}))
		return f"{self.__class__.__qualname__}(\n {args}\n)"

		def equal(self, other: Any, equal_nan: bool = False) -> bool:
		if not isinstance(other, self.__class__):
		return False
		return self._state.equal(other._state, equal_nan=equal_nan)

		def generate_content(self) -> str:
		logger.info(f"Generating the continuous distribution of {self._state.series.name}...")
		figures = ContinuousSeriesPlotter(state=self._state).plot()
		stats = compute_statistics_continuous(self._state.series)
		null_values_pct = (
		f"{100 * stats['num_nulls'] / stats['count']:.2f}" if stats["count"] > 0 else "N/A"
		)
		xmin, xmax = find_range(
		self._state.series.drop_nulls().to_numpy(),
		xmin=self._state.figure_config.get_arg("xmin"),
		xmax=self._state.figure_config.get_arg("xmin"),
		)
		return Template(create_template()).render(
		{
		"column": self._state.series.name,
		"figure": figure2html(figures["continuous_histogram"], close_fig=True),
		"table": create_table(stats),
		"total_values": f"{stats['count']:,}",
		"unique_values": f"{stats['nunique']:,}",
		"null_values": f"{stats['num_nulls']:,}",
		"null_values_pct": null_values_pct,
		"min_value": f"{stats['min']:,}",
		"max_value": f"{stats['max']:,}",
		"xmin": f"{xmin:,}",
		"xmax": f"{xmax:,}",
		"dtype": str(self._state.series.dtype),
		}
		)


		def create_template() -> str:
		r"""Return the template of the content.

		Returns:
		The content template.

		Example usage:

		```pycon

		>>> from arkas.content.continuous_series import create_template
		>>> template = create_template()

		```
		"""
		return """<p>This section analyzes the distribution of continuous values for column <em>{{column}}</em>.</p>
		<ul>
		<li> <b>total values:</b> {{total_values}} </li>
		<li> <b>number of unique values:</b> {{unique_values}} </li>
		<li> <b>number of null values:</b> {{null_values}} / {{total_values}} ({{null_values_pct}}%) </li>
		<li> <b>range of values:</b> [{{min_value}}, {{max_value}}] </li>
		<li> <b>data type:</b> <em>{{dtype}}</em> </li>
		</ul>

		<p>The histogram shows the distribution of values in the range [{{xmin}}, {{xmax}}].</p>
		{{figure}}

		<details>
		<summary>[show statistics]</summary>
		<p style="margin-top: 1rem;">
		The following table shows some statistics about the distribution for column <em>{{column}}<em>.
		</p>
		{{table}}
		</details>
		"""


		def create_table(stats: dict) -> str:
		r"""Create the HTML code of the table with statistics.

		Args:
		stats: Specifies a dictionary with the statistics.

		Returns:
		The HTML code of the table.

		Example usage:

		```pycon

		>>> from arkas.content.continuous_series import create_table
		>>> table = create_table(
		... stats={
		... "count": 101,
		... "nunique": 101,
		... "num_non_nulls": 101,
		... "num_nulls": 0,
		... "mean": 50.0,
		... "std": 29.15,
		... "skewness": 0.0,
		... "kurtosis": -1.20,
		... "min": 0.0,
		... "q001": 0.1,
		... "q01": 1.0,
		... "q05": 5.0,
		... "q10": 10.0,
		... "q25": 25.0,
		... "median": 50.0,
		... "q75": 75.0,
		... "q90": 90.0,
		... "q95": 95.0,
		... "q99": 99.0,
		... "q999": 99.9,
		... "max": 100.0,
		... ">0": 100,
		... "<0": 0,
		... "=0": 1,
		... },
		... )

		```
		"""
		return Template(
		"""<table class="table table-hover table-responsive w-auto" >
		<thead class="thead table-group-divider">
		<tr><th>stat</th><th>value</th></tr>
		</thead>
		<tbody class="tbody table-group-divider">
		<tr><th>count</th><td {{num_style}}>{{count}}</td></tr>
		<tr><th>mean</th><td {{num_style}}>{{mean}}</td></tr>
		<tr><th>std</th><td {{num_style}}>{{std}}</td></tr>
		<tr><th>skewness</th><td {{num_style}}>{{skewness}}</td></tr>
		<tr><th>kurtosis</th><td {{num_style}}>{{kurtosis}}</td></tr>
		<tr><th>min</th><td {{num_style}}>{{min}}</td></tr>
		<tr><th>quantile 0.1%</th><td {{num_style}}>{{q01}}</td></tr>
		<tr><th>quantile 1%</th><td {{num_style}}>{{q01}}</td></tr>
		<tr><th>quantile 5%</th><td {{num_style}}>{{q05}}</td></tr>
		<tr><th>quantile 10%</th><td {{num_style}}>{{q10}}</td></tr>
		<tr><th>quantile 25%</th><td {{num_style}}>{{q25}}</td></tr>
		<tr><th>median</th><td {{num_style}}>{{median}}</td></tr>
		<tr><th>quantile 75%</th><td {{num_style}}>{{q75}}</td></tr>
		<tr><th>quantile 90%</th><td {{num_style}}>{{q90}}</td></tr>
		<tr><th>quantile 95%</th><td {{num_style}}>{{q95}}</td></tr>
		<tr><th>quantile 99%</th><td {{num_style}}>{{q99}}</td></tr>
		<tr><th>quantile 99.9%</th><td {{num_style}}>{{q99}}</td></tr>
		<tr><th>max</th><td {{num_style}}>{{max}}</td></tr>
		<tr><th>number of zeros</th><td {{num_style}}>{{num_zeros}}</td></tr>
		<tr><th>number of positive values</th><td {{num_style}}>{{num_pos}}</td></tr>
		<tr><th>number of negative values</th><td {{num_style}}>{{num_neg}}</td></tr>
		<tr class="table-group-divider"></tr>
		</tbody>
		</table>
		"""
		).render(
		{
		"num_style": 'style="text-align: right;"',
		"count": f"{stats['count']:,}",
		"mean": f"{stats['mean']:,.4f}",
		"std": f"{stats['std']:,.4f}",
		"skewness": f"{stats['skewness']:,.4f}",
		"kurtosis": f"{stats['kurtosis']:,.4f}",
		"min": f"{stats['min']:,.4f}",
		"q001": f"{stats['q001']:,.4f}",
		"q01": f"{stats['q01']:,.4f}",
		"q05": f"{stats['q05']:,.4f}",
		"q10": f"{stats['q10']:,.4f}",
		"q25": f"{stats['q25']:,.4f}",
		"median": f"{stats['median']:,.4f}",
		"q75": f"{stats['q75']:,.4f}",
		"q90": f"{stats['q90']:,.4f}",
		"q95": f"{stats['q95']:,.4f}",
		"q99": f"{stats['q99']:,.4f}",
		"q999": f"{stats['q999']:,.4f}",
		"max": f"{stats['max']:,.4f}",
		"num_pos": f"{stats['>0']:,}",
		"num_neg": f"{stats['<0']:,}",
		"num_zeros": f"{stats['=0']:,}",
		}
		)

+232

src/arkas/content/null_value.py

		r"""Contain the implementation of a HTML content generator that analyzes
		the number of null values per column."""

		from __future__ import annotations

		__all__ = ["NullValueContentGenerator", "create_template"]

		import logging
		from typing import TYPE_CHECKING, Any

		from coola.utils import repr_indent, repr_mapping, str_indent, str_mapping
		from jinja2 import Template

		from arkas.content.section import BaseSectionContentGenerator
		from arkas.figure.utils import figure2html
		from arkas.plotter.null_value import NullValuePlotter

		if TYPE_CHECKING:
		import polars as pl

		from arkas.state.null_value import NullValueState

		logger = logging.getLogger(__name__)


		class NullValueContentGenerator(BaseSectionContentGenerator):
		r"""Implement a content generator that analyzes the number of null
		values per column.

		Args:
		state: The state containing the number of null values per
		column.

		Example usage:

		```pycon

		>>> import numpy as np
		>>> from arkas.content import NullValueContentGenerator
		>>> from arkas.state import NullValueState
		>>> content = NullValueContentGenerator(
		... NullValueState(
		... null_count=np.array([0, 1, 2]),
		... total_count=np.array([5, 5, 5]),
		... columns=["col1", "col2", "col3"],
		... )
		... )
		>>> content
		NullValueContentGenerator(
		(state): NullValueState(num_columns=3, figure_config=MatplotlibFigureConfig())
		)

		```
		"""

		def __init__(self, state: NullValueState) -> None:
		self._state = state

		def __repr__(self) -> str:
		args = repr_indent(repr_mapping({"state": self._state}))
		return f"{self.__class__.__qualname__}(\n {args}\n)"

		def __str__(self) -> str:
		args = str_indent(str_mapping({"state": self._state}))
		return f"{self.__class__.__qualname__}(\n {args}\n)"

		def equal(self, other: Any, equal_nan: bool = False) -> bool:
		if not isinstance(other, self.__class__):
		return False
		return self._state.equal(other._state, equal_nan=equal_nan)

		def generate_content(self) -> str:
		ncols = len(self._state.columns)
		logger.info(f"Generating the null values bar plot for {ncols:,} columns...")
		figures = NullValuePlotter(state=self._state).plot()
		frame = self._state.to_dataframe()
		return Template(create_template()).render(
		{
		"ncols": f"{ncols:,}",
		"columns": ", ".join(self._state.columns),
		"figure": figure2html(figures["null_values"], close_fig=True),
		"table_alpha": create_table(frame.sort(by="column")),
		"table_sort": create_table(frame.sort(by="null")),
		}
		)


		def create_template() -> str:
		r"""Return the template of the content.

		Returns:
		The content template.

		Example usage:

		```pycon

		>>> from arkas.content.null_value import create_template
		>>> template = create_template()

		```
		"""
		return """This section analyzes the number and proportion of null values for the {{ncols}}
		columns: <em>{{columns}}</em>.

		<p>The columns are sorted by ascending order of number of null values in the following bar plot.</p>

		{{figure}}

		<details>
		<summary>[show statistics per column]</summary>

		<p style="margin-top: 1rem;">
		The following tables show the number and proportion of null values for the {{num_columns}}
		columns.
		The background color of the row indicates the proportion of missing values:
		dark blues indicates more missing values than light blues. </p>

		<ul>
		<li> <b>column</b>: is the column name </li>
		<li> <b>null pct</b>: is the percentage of null values in the column </li>
		<li> <b>null count</b>: is the number of null values in the column </li>
		<li> <b>total count</b>: is the total number of values in the column </li>
		</ul>

		<div class="container-fluid">
		<div class="row align-items-start">
		<div class="col align-self-center">
		<p><b>Columns sorted by alphabetical order</b></p>
		{{table_alpha}}
		</div>
		<div class="col">
		<p><b>Columns sorted by ascending order of missing values</b></p>
		{{table_sort}}
		</div>
		</div>
		</div>
		</details>
		"""


		def create_table(frame: pl.DataFrame) -> str:
		r"""Return a HTML code of a table with the temporal distribution of
		null values.

		Args:
		frame: The DataFrame to analyze.

		Returns:
		The HTML code of the table.

		Example usage:

		```pycon

		>>> import polars as pl
		>>> from arkas.content.null_value import create_table
		>>> frame = pl.DataFrame(
		... {"column": ["A", "B", "C"], "null": [0, 1, 2], "total": [4, 4, 4]},
		... schema={"column": pl.String, "null": pl.Int64, "total": pl.Int64},
		... )
		>>> table = create_table(frame)

		```
		"""
		rows = [
		create_table_row(column=column, null_count=null, total_count=total)
		for column, null, total in zip(
		frame["column"],
		frame["null"],
		frame["total"],
		)
		]
		return Template(
		"""<table class="table table-hover table-responsive w-auto" >
		<thead class="thead table-group-divider">
		<tr>
		<th>column</th>
		<th>null pct</th>
		<th>null count</th>
		<th>total count</th>
		</tr>
		</thead>
		<tbody class="tbody table-group-divider">
		{{rows}}
		<tr class="table-group-divider"></tr>
		</tbody>
		</table>
		"""
		).render({"rows": "\n".join(rows)})


		def create_table_row(column: str, null_count: int, total_count: int) -> str:
		r"""Create the HTML code of a new table row.

		Args:
		column: The column name.
		null_count: The number of null values.
		total_count: The total number of rows.

		Returns:
		The HTML code of a row.

		Example usage:

		```pycon

		>>> from arkas.content.null_value import create_table_row
		>>> row = create_table_row(column="col", null_count=5, total_count=101)

		```
		"""
		pct = null_count / total_count if total_count > 0 else float("nan")
		pct_color = pct if total_count > 0 else 0
		return Template(
		"<tr>"
		'<th style="background-color: rgba(0, 191, 255, {{null_pct}})">{{column}}</th>'
		"<td {{num_style}}>{{null_pct}}</td>"
		"<td {{num_style}}>{{null_count}}</td>"
		"<td {{num_style}}>{{total_count}}</td>"
		"</tr>"
		).render(
		{
		"num_style": (
		f'style="text-align: right; background-color: rgba(0, 191, 255, {pct_color})"'
		),
		"column": column,
		"null_count": f"{null_count:,}",
		"null_pct": f"{pct:.4f}",
		"total_count": f"{total_count:,}",
		}
		)

+251

src/arkas/content/temporal_null_value.py

		r"""Contain the implementation of a HTML content generator that analyzes
		the temporal distribution of null values."""

		from __future__ import annotations

		__all__ = ["TemporalNullValueContentGenerator", "create_template"]

		import logging
		from typing import TYPE_CHECKING, Any

		from coola.utils import repr_indent, repr_mapping, str_indent, str_mapping
		from grizz.utils.null import compute_temporal_null_count
		from jinja2 import Template

		from arkas.content.section import BaseSectionContentGenerator
		from arkas.figure.utils import figure2html
		from arkas.plotter.temporal_null_value import TemporalNullValuePlotter

		if TYPE_CHECKING:
		import polars as pl

		from arkas.state.temporal_dataframe import TemporalDataFrameState


		logger = logging.getLogger(__name__)


		class TemporalNullValueContentGenerator(BaseSectionContentGenerator):
		r"""Implement a content generator that analyzes the temporal
		distribution of null values.

		Args:
		state: The state containing the DataFrame to analyze.

		Example usage:

		```pycon

		>>> from datetime import datetime, timezone
		>>> import polars as pl
		>>> from arkas.content import TemporalNullValueContentGenerator
		>>> from arkas.state import TemporalDataFrameState
		>>> dataframe = pl.DataFrame(
		... {
		... "col1": [0, 1, 1, 0],
		... "col2": [0, 1, 0, 1],
		... "col3": [1, 0, 0, 0],
		... "datetime": [
		... datetime(year=2020, month=1, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=2, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=3, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=4, day=3, tzinfo=timezone.utc),
		... ],
		... },
		... schema={
		... "col1": pl.Int64,
		... "col2": pl.Int64,
		... "col3": pl.Int64,
		... "datetime": pl.Datetime(time_unit="us", time_zone="UTC"),
		... },
		... )
		>>> content = TemporalNullValueContentGenerator(
		... TemporalDataFrameState(dataframe, temporal_column="datetime")
		... )
		>>> content
		TemporalNullValueContentGenerator(
		(state): TemporalDataFrameState(dataframe=(4, 4), temporal_column='datetime', period=None, figure_config=MatplotlibFigureConfig())
		)

		```
		"""

		def __init__(self, state: TemporalDataFrameState) -> None:
		self._state = state

		def __repr__(self) -> str:
		args = repr_indent(repr_mapping({"state": self._state}))
		return f"{self.__class__.__qualname__}(\n {args}\n)"

		def __str__(self) -> str:
		args = str_indent(str_mapping({"state": self._state}))
		return f"{self.__class__.__qualname__}(\n {args}\n)"

		def equal(self, other: Any, equal_nan: bool = False) -> bool:
		if not isinstance(other, self.__class__):
		return False
		return self._state.equal(other._state, equal_nan=equal_nan)

		def generate_content(self) -> str:
		nrows, ncols = self._state.dataframe.shape
		logger.info(
		f"Generating the temporal plot of {ncols} columns using the "
		f"temporal column {self._state.temporal_column!r}..."
		)
		figures = TemporalNullValuePlotter(state=self._state).plot()
		return Template(create_template()).render(
		{
		"nrows": f"{nrows:,}",
		"ncols": f"{ncols:,}",
		"columns": ", ".join(self._state.dataframe.columns),
		"temporal_column": self._state.temporal_column,
		"figure": figure2html(figures["temporal_null_value"], close_fig=True),
		"table": create_table(
		frame=self._state.dataframe,
		temporal_column=self._state.temporal_column,
		period=self._state.period,
		),
		}
		)


		def create_template() -> str:
		r"""Return the template of the content.

		Returns:
		The content template.

		Example usage:

		```pycon

		>>> from arkas.content.temporal_null_value import create_template
		>>> template = create_template()

		```
		"""
		return """<p>This section analyzes the temporal distribution of null values in all columns.
		The column <em>{{temporal_column}}</em> is used as the temporal column.</p>
		{{figure}}
		<details>
		<summary>[show statistics per temporal period]</summary>
		<p style="margin-top: 1rem;">The following table shows some statistics for each period.</p>
		{{table}}
		</details>
		"""


		def create_table(frame: pl.DataFrame, temporal_column: str, period: str) -> str:
		r"""Create a HTML representation of a table with the temporal
		distribution of null values.

		Args:
		frame: The DataFrame to analyze.
		temporal_column: The temporal column used to analyze the
		temporal distribution.
		period: The temporal period e.g. monthly or daily.

		Returns:
		The HTML representation of the table.

		Example usage:

		```pycon

		>>> from datetime import datetime, timezone
		>>> import polars as pl
		>>> from arkas.content.temporal_null_value import create_table
		>>> frame = pl.DataFrame(
		... {
		... "col1": [None, 1.0, 0.0, 1.0],
		... "col2": [None, 1, 0, None],
		... "datetime": [
		... datetime(year=2020, month=1, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=2, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=3, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=4, day=3, tzinfo=timezone.utc),
		... ],
		... },
		... schema={
		... "col1": pl.Float64,
		... "col2": pl.Int64,
		... "datetime": pl.Datetime(time_unit="us", time_zone="UTC"),
		... },
		... )
		>>> table = create_table(frame=frame, temporal_column="datetime", period="1mo")

		```
		"""
		if frame.is_empty():
		return ""

		columns = list(frame.columns)
		columns.remove(temporal_column)
		nulls, totals, labels = compute_temporal_null_count(
		frame=frame, columns=columns, temporal_column=temporal_column, period=period
		)
		rows = []
		for label, null, total in zip(labels, nulls, totals):
		rows.append(create_table_row(label=label, num_nulls=null, total=total))
		return Template(
		"""<table class="table table-hover table-responsive w-auto" >
		<thead class="thead table-group-divider">
		<tr>
		<th>period</th>
		<th>number of null values</th>
		<th>number of non-null values</th>
		<th>total number of values</th>
		<th>percentage of null values</th>
		<th>percentage of non-null values</th>
		</tr>
		</thead>
		<tbody class="tbody table-group-divider">
		{{rows}}
		<tr class="table-group-divider"></tr>
		</tbody>
		</table>
		"""
		).render({"rows": "\n".join(rows), "period": period})


		def create_table_row(label: str, num_nulls: int, total: int) -> str:
		r"""Create the HTML code of a new table row.

		Args:
		label: The label of the row.
		num_nulls: The number of null values.
		total: The total number of values.

		Returns:
		The HTML code of a row.

		Example usage:

		```pycon

		>>> from arkas.content.temporal_null_value import create_table_row
		>>> row = create_table_row(label="col", num_nulls=5, total=42)

		```
		"""
		num_non_nulls = total - num_nulls
		return Template(
		"""<tr>
		<th>{{label}}</th>
		<td {{num_style}}>{{num_nulls}}</td>
		<td {{num_style}}>{{num_non_nulls}}</td>
		<td {{num_style}}>{{total}}</td>
		<td {{num_style}}>{{num_nulls_pct}}</td>
		<td {{num_style}}>{{num_non_nulls_pct}}</td>
		</tr>"""
		).render(
		{
		"num_style": 'style="text-align: right;"',
		"label": label,
		"num_nulls": f"{num_nulls:,}",
		"num_non_nulls": f"{num_non_nulls:,}",
		"total": f"{total:,}",
		"num_nulls_pct": f"{100 * num_nulls / total:.2f}%",
		"num_non_nulls_pct": f"{100 * num_non_nulls / total:.2f}%",
		}
		)

+75

src/arkas/output/continuous_series.py

		r"""Implement an output to analyze a series with continuous values."""

		from __future__ import annotations

		__all__ = ["ContinuousSeriesOutput"]

		from typing import TYPE_CHECKING, Any

		from coola.utils import repr_indent, repr_mapping, str_indent, str_mapping

		from arkas.content.continuous_series import ContinuousSeriesContentGenerator
		from arkas.evaluator2.vanilla import Evaluator
		from arkas.output.lazy import BaseLazyOutput
		from arkas.plotter.continuous_series import ContinuousSeriesPlotter

		if TYPE_CHECKING:
		from arkas.state.series import SeriesState


		class ContinuousSeriesOutput(BaseLazyOutput):
		r"""Implement an output to analyze a series with continuous values.

		Args:
		state: The state containing the Series to analyze.

		Example usage:

		```pycon

		>>> import polars as pl
		>>> from arkas.output import ContinuousSeriesOutput
		>>> from arkas.state import SeriesState
		>>> output = ContinuousSeriesOutput(SeriesState(pl.Series("col1", [1, 2, 3, 4, 5, 6, 7])))
		>>> output
		ContinuousSeriesOutput(
		(state): SeriesState(name='col1', values=(7,), figure_config=MatplotlibFigureConfig())
		)
		>>> output.get_content_generator()
		ContinuousSeriesContentGenerator(
		(state): SeriesState(name='col1', values=(7,), figure_config=MatplotlibFigureConfig())
		)
		>>> output.get_evaluator()
		Evaluator(count=0)
		>>> output.get_plotter()
		ContinuousSeriesPlotter(
		(state): SeriesState(name='col1', values=(7,), figure_config=MatplotlibFigureConfig())
		)

		```
		"""

		def __init__(self, state: SeriesState) -> None:
		self._state = state

		def __repr__(self) -> str:
		args = repr_indent(repr_mapping({"state": self._state}))
		return f"{self.__class__.__qualname__}(\n {args}\n)"

		def __str__(self) -> str:
		args = str_indent(str_mapping({"state": self._state}))
		return f"{self.__class__.__qualname__}(\n {args}\n)"

		def equal(self, other: Any, equal_nan: bool = False) -> bool:
		if not isinstance(other, self.__class__):
		return False
		return self._state.equal(other._state, equal_nan=equal_nan)

		def _get_content_generator(self) -> ContinuousSeriesContentGenerator:
		return ContinuousSeriesContentGenerator(self._state)

		def _get_evaluator(self) -> Evaluator:
		return Evaluator()

		def _get_plotter(self) -> ContinuousSeriesPlotter:
		return ContinuousSeriesPlotter(self._state)

+84

src/arkas/output/null_value.py

		r"""Implement an output to analyze the number of null values per
		column."""

		from __future__ import annotations

		__all__ = ["NullValueOutput"]

		from typing import TYPE_CHECKING, Any

		from coola.utils import repr_indent, repr_mapping, str_indent, str_mapping

		from arkas.content.null_value import NullValueContentGenerator
		from arkas.evaluator2.vanilla import Evaluator
		from arkas.output.lazy import BaseLazyOutput
		from arkas.plotter.null_value import NullValuePlotter

		if TYPE_CHECKING:
		from arkas.state.null_value import NullValueState


		class NullValueOutput(BaseLazyOutput):
		r"""Implement an output to analyze the number of null values per
		column.

		Args:
		state: The state containing the number of null values per
		column.

		Example usage:

		```pycon

		>>> import numpy as np
		>>> from arkas.output import NullValueOutput
		>>> from arkas.state import NullValueState
		>>> output = NullValueOutput(
		... NullValueState(
		... null_count=np.array([0, 1, 2]),
		... total_count=np.array([5, 5, 5]),
		... columns=["col1", "col2", "col3"],
		... )
		... )
		>>> output
		NullValueOutput(
		(state): NullValueState(num_columns=3, figure_config=MatplotlibFigureConfig())
		)
		>>> output.get_content_generator()
		NullValueContentGenerator(
		(state): NullValueState(num_columns=3, figure_config=MatplotlibFigureConfig())
		)
		>>> output.get_evaluator()
		Evaluator(count=0)
		>>> output.get_plotter()
		NullValuePlotter(
		(state): NullValueState(num_columns=3, figure_config=MatplotlibFigureConfig())
		)

		```
		"""

		def __init__(self, state: NullValueState) -> None:
		self._state = state

		def __repr__(self) -> str:
		args = repr_indent(repr_mapping({"state": self._state}))
		return f"{self.__class__.__qualname__}(\n {args}\n)"

		def __str__(self) -> str:
		args = str_indent(str_mapping({"state": self._state}))
		return f"{self.__class__.__qualname__}(\n {args}\n)"

		def equal(self, other: Any, equal_nan: bool = False) -> bool:
		if not isinstance(other, self.__class__):
		return False
		return self._state.equal(other._state, equal_nan=equal_nan)

		def _get_content_generator(self) -> NullValueContentGenerator:
		return NullValueContentGenerator(self._state)

		def _get_evaluator(self) -> Evaluator:
		return Evaluator()

		def _get_plotter(self) -> NullValuePlotter:
		return NullValuePlotter(self._state)

+99

src/arkas/output/temporal_null_value.py

		r"""Implement an output to analyze the number of null values in a
		DataFrame."""

		from __future__ import annotations

		__all__ = ["TemporalNullValueOutput"]

		from typing import TYPE_CHECKING, Any

		from coola.utils import repr_indent, repr_mapping, str_indent, str_mapping

		from arkas.content.temporal_null_value import TemporalNullValueContentGenerator
		from arkas.evaluator2.vanilla import Evaluator
		from arkas.output.lazy import BaseLazyOutput
		from arkas.plotter.temporal_null_value import TemporalNullValuePlotter

		if TYPE_CHECKING:
		from arkas.state.temporal_dataframe import TemporalDataFrameState


		class TemporalNullValueOutput(BaseLazyOutput):
		r"""Implement an output to analyze the number of null values in a
		DataFrame.

		Args:
		state: The state containing the DataFrame to analyze.

		Example usage:

		```pycon

		>>> from datetime import datetime, timezone
		>>> import polars as pl
		>>> from arkas.output import TemporalNullValueOutput
		>>> from arkas.state import TemporalDataFrameState
		>>> frame = pl.DataFrame(
		... {
		... "col1": [0, 1, 1, 0],
		... "col2": [0, 1, 0, 1],
		... "col3": [1, 0, 0, 0],
		... "datetime": [
		... datetime(year=2020, month=1, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=2, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=3, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=4, day=3, tzinfo=timezone.utc),
		... ],
		... },
		... schema={
		... "col1": pl.Int64,
		... "col2": pl.Int64,
		... "col3": pl.Int64,
		... "datetime": pl.Datetime(time_unit="us", time_zone="UTC"),
		... },
		... )
		>>> output = TemporalNullValueOutput(
		... TemporalDataFrameState(frame, temporal_column="datetime")
		... )
		>>> output
		TemporalNullValueOutput(
		(state): TemporalDataFrameState(dataframe=(4, 4), temporal_column='datetime', period=None, figure_config=MatplotlibFigureConfig())
		)
		>>> output.get_content_generator()
		TemporalNullValueContentGenerator(
		(state): TemporalDataFrameState(dataframe=(4, 4), temporal_column='datetime', period=None, figure_config=MatplotlibFigureConfig())
		)
		>>> output.get_evaluator()
		Evaluator(count=0)
		>>> output.get_plotter()
		TemporalNullValuePlotter(
		(state): TemporalDataFrameState(dataframe=(4, 4), temporal_column='datetime', period=None, figure_config=MatplotlibFigureConfig())
		)

		```
		"""

		def __init__(self, state: TemporalDataFrameState) -> None:
		self._state = state

		def __repr__(self) -> str:
		args = repr_indent(repr_mapping({"state": self._state}))
		return f"{self.__class__.__qualname__}(\n {args}\n)"

		def __str__(self) -> str:
		args = str_indent(str_mapping({"state": self._state}))
		return f"{self.__class__.__qualname__}(\n {args}\n)"

		def equal(self, other: Any, equal_nan: bool = False) -> bool:
		if not isinstance(other, self.__class__):
		return False
		return self._state.equal(other._state, equal_nan=equal_nan)

		def _get_content_generator(self) -> TemporalNullValueContentGenerator:
		return TemporalNullValueContentGenerator(self._state)

		def _get_evaluator(self) -> Evaluator:
		return Evaluator()

		def _get_plotter(self) -> TemporalNullValuePlotter:
		return TemporalNullValuePlotter(self._state)

+65

src/arkas/plot/cdf.py

		r"""Contain CDF plotting functions."""

		from __future__ import annotations

		__all__ = ["plot_cdf"]

		from typing import TYPE_CHECKING

		import numpy as np

		from arkas.utils.array import nonnan

		if TYPE_CHECKING:
		from matplotlib.axes import Axes


		def plot_cdf(
		ax: Axes,
		array: np.ndarray,
		nbins: int \| None = None,
		xmin: float = float("-inf"),
		xmax: float = float("inf"),
		color: str = "tab:blue",
		labelcolor: str = "black",
		) -> None:
		r"""Plot the cumulative distribution function (CDF).

		Args:
		ax: The axes of the matplotlib figure to update.
		array: The array with the data.
		nbins: The number of bins to use to plot the CDF.
		xmin: The minimum value of the range or its
		associated quantile. ``q0.1`` means the 10% quantile.
		``0`` is the minimum value and ``1`` is the maximum value.
		xmax: The maximum value of the range or its
		associated quantile. ``q0.9`` means the 90% quantile.
		``0`` is the minimum value and ``1`` is the maximum value.
		color: The plot color.
		labelcolor: The label color.

		Example usage:

		```pycon

		>>> import numpy as np
		>>> from matplotlib import pyplot as plt
		>>> from arkas.plot import plot_cdf
		>>> fig, ax = plt.subplots()
		>>> plot_cdf(ax, array=np.arange(101))

		```
		"""
		array = nonnan(array.ravel())
		if array.size == 0:
		return
		nbins = nbins or min(1000, array.size)
		nleft = array[array < xmin].size
		nright = array[array > xmax].size
		counts, edges = np.histogram(array[np.logical_and(array >= xmin, array <= xmax)], bins=nbins)
		cdf = (np.cumsum(counts) + nleft) / (np.sum(counts) + nleft + nright)
		x = [(left + right) * 0.5 for left, right in zip(edges[:-1], edges[1:])]
		ax.tick_params(axis="y", labelcolor=labelcolor)
		ax.plot(x, cdf, color=color, label="CDF")
		ax.set_ylim(0.0, 1.0)
		ax.set_ylabel("cumulative distribution function (CDF)", color=labelcolor)

+301

src/arkas/plot/continuous.py

		r"""Contain plotting functions to analyze continuous values."""

		from __future__ import annotations

		__all__ = [
		"boxplot_continuous",
		"boxplot_continuous_temporal",
		"hist_continuous",
		"hist_continuous2",
		]

		from typing import TYPE_CHECKING

		import numpy as np

		from arkas.plot.cdf import plot_cdf
		from arkas.plot.utils import (
		auto_yscale_continuous,
		axvline_quantile,
		readable_xticklabels,
		)
		from arkas.utils.array import nonnan
		from arkas.utils.range import find_range

		if TYPE_CHECKING:
		from collections.abc import Sequence

		from matplotlib.axes import Axes


		def boxplot_continuous(
		ax: Axes,
		array: np.ndarray,
		xmin: float \| str \| None = None,
		xmax: float \| str \| None = None,
		) -> None:
		r"""Plot the histogram of an array containing continuous values.

		Args:
		ax: The axes of the matplotlib figure to update.
		array: The array with the data.
		xmin: The minimum value of the range or its
		associated quantile. ``q0.1`` means the 10% quantile.
		``0`` is the minimum value and ``1`` is the maximum value.
		xmax: The maximum value of the range or its
		associated quantile. ``q0.9`` means the 90% quantile.
		``0`` is the minimum value and ``1`` is the maximum value.

		Example usage:

		```pycon

		>>> import numpy as np
		>>> from matplotlib import pyplot as plt
		>>> from arkas.plot import boxplot_continuous
		>>> fig, ax = plt.subplots()
		>>> boxplot_continuous(ax, array=np.arange(101))

		```
		"""
		array = array.ravel()
		if array.size == 0:
		return
		xmin, xmax = find_range(array, xmin=xmin, xmax=xmax)
		ax.boxplot(
		array,
		notch=True,
		vert=False,
		widths=0.7,
		patch_artist=True,
		boxprops={"facecolor": "lightblue"},
		)
		readable_xticklabels(ax, max_num_xticks=100)
		if xmin < xmax:
		ax.set_xlim(xmin, xmax)
		ax.set_ylabel(" ")


		def boxplot_continuous_temporal(
		ax: Axes,
		data: Sequence[np.ndarray],
		steps: Sequence,
		ymin: float \| str \| None = None,
		ymax: float \| str \| None = None,
		yscale: str = "linear",
		) -> None:
		r"""Plot the histogram of an array containing continuous values.

		Args:
		ax: The axes of the matplotlib figure to update.
		data: The sequence of data where each item is a 1-d array with
		the values of the time step.
		steps: The sequence time step names.
		ymin: The minimum value of the range or its
		associated quantile. ``q0.1`` means the 10% quantile.
		``0`` is the minimum value and ``1`` is the maximum value.
		ymax: The maximum value of the range or its
		associated quantile. ``q0.9`` means the 90% quantile.
		``0`` is the minimum value and ``1`` is the maximum value.
		yscale: The y-axis scale. If ``'auto'``, the
		``'linear'`` or ``'log'/'symlog'`` scale is chosen based
		on the distribution.

		Raises:
		RuntimeError: if ``data`` and ``steps`` have different lengths

		Example usage:

		```pycon

		>>> import numpy as np
		>>> from matplotlib import pyplot as plt
		>>> from arkas.plot import boxplot_continuous_temporal
		>>> fig, ax = plt.subplots()
		>>> rng = np.random.default_rng()
		>>> data = [rng.standard_normal(1000) for _ in range(10)]
		>>> boxplot_continuous_temporal(ax, data=data, steps=list(range(len(data))))

		```
		"""
		if len(data) == 0:
		return
		if len(data) != len(steps):
		msg = f"data and steps have different lengths: {len(data):,} vs {len(steps):,}"
		raise RuntimeError(msg)
		data = [nonnan(x) for x in data]
		ax.boxplot(
		data,
		notch=True,
		vert=True,
		widths=0.7,
		patch_artist=True,
		boxprops={"facecolor": "lightblue"},
		)
		array = np.concatenate(data)
		ymin, ymax = find_range(array, xmin=ymin, xmax=ymax)
		if ymin < ymax:
		ax.set_ylim(ymin, ymax)
		ax.set_xticks(np.arange(len(steps)), labels=steps)
		if yscale == "auto":
		yscale = auto_yscale_continuous(array=array, nbins=100)
		ax.set_yscale(yscale)
		readable_xticklabels(ax)


		def hist_continuous(
		ax: Axes,
		array: np.ndarray,
		nbins: int \| None = None,
		density: bool = False,
		yscale: str = "linear",
		xmin: float \| str \| None = None,
		xmax: float \| str \| None = None,
		cdf: bool = True,
		quantile: bool = True,
		) -> None:
		r"""Plot the histogram of an array containing continuous values.

		Args:
		ax: The axes of the matplotlib figure to update.
		array: The array with the data.
		nbins: The number of bins to use to plot.
		density: If True, draw and return a probability density:
		each bin will display the bin's raw count divided by the
		total number of counts and the bin width, so that the area
		under the histogram integrates to 1.
		yscale: The y-axis scale. If ``'auto'``, the
		``'linear'`` or ``'log'/'symlog'`` scale is chosen based
		on the distribution.
		xmin: The minimum value of the range or its
		associated quantile. ``q0.1`` means the 10% quantile.
		``0`` is the minimum value and ``1`` is the maximum value.
		xmax: The maximum value of the range or its
		associated quantile. ``q0.9`` means the 90% quantile.
		``0`` is the minimum value and ``1`` is the maximum value.
		cdf: If ``True``, the CDF is added to the plot.
		quantile: If ``True``, the 5% and 95% quantiles are added to
		the plot.

		Example usage:

		```pycon

		>>> import numpy as np
		>>> from matplotlib import pyplot as plt
		>>> from arkas.plot import hist_continuous
		>>> fig, ax = plt.subplots()
		>>> hist_continuous(ax, array=np.arange(101))

		```
		"""
		array = array.ravel()
		if array.size == 0:
		return
		xmin, xmax = find_range(array, xmin=xmin, xmax=xmax)
		ax.hist(array, bins=nbins, range=(xmin, xmax), color="tab:blue", alpha=0.9, density=density)
		readable_xticklabels(ax, max_num_xticks=100)
		if xmin < xmax:
		ax.set_xlim(xmin, xmax)
		ax.set_ylabel("density (number of occurrences/total)" if density else "number of occurrences")
		if yscale == "auto":
		yscale = auto_yscale_continuous(array=array, nbins=nbins)
		ax.set_yscale(yscale)
		if cdf:
		plot_cdf(
		ax=ax.twinx(),
		array=array,
		nbins=nbins,
		xmin=xmin,
		xmax=xmax,
		color="tab:red",
		labelcolor="tab:red",
		)

		if not quantile:
		return
		q05, q95 = np.quantile(array, q=[0.05, 0.95])
		if xmin < q05 < xmax:
		axvline_quantile(ax, quantile=q05, label="q0.05 ", horizontalalignment="right")
		if xmin < q95 < xmax:
		axvline_quantile(ax, quantile=q95, label=" q0.95", horizontalalignment="left")


		def hist_continuous2(
		ax: Axes,
		array1: np.ndarray,
		array2: np.ndarray,
		label1: str = "first",
		label2: str = "second",
		nbins: int \| None = None,
		density: bool = False,
		yscale: str = "linear",
		xmin: float \| str \| None = None,
		xmax: float \| str \| None = None,
		) -> None:
		r"""Plot the histogram of two arrays to compare the distributions.

		Args:
		ax: The axes of the matplotlib figure to update.
		array1: The first array with the data.
		array2: The second array with the data.
		label1: The label associated to the first array.
		label2: The label associated to the second array.
		nbins: The number of bins to use to plot.
		density: If True, draw and return a probability density:
		each bin will display the bin's raw count divided by the
		total number of counts and the bin width, so that the area
		under the histogram integrates to 1.
		yscale: The y-axis scale. If ``'auto'``, the
		``'linear'`` or ``'log'/'symlog'`` scale is chosen based
		on the distribution.
		xmin: The minimum value of the range or its
		associated quantile. ``q0.1`` means the 10% quantile.
		``0`` is the minimum value and ``1`` is the maximum value.
		xmax: The maximum value of the range or its
		associated quantile. ``q0.9`` means the 90% quantile.
		``0`` is the minimum value and ``1`` is the maximum value.

		Example usage:

		```pycon

		>>> import numpy as np
		>>> from matplotlib import pyplot as plt
		>>> from arkas.plot import hist_continuous2
		>>> fig, ax = plt.subplots()
		>>> hist_continuous2(ax, array1=np.arange(101), array2=np.arange(51))

		```
		"""
		array1, array2 = array1.ravel(), array2.ravel()
		array = np.concatenate([array1, array2])
		if array.size == 0:
		return
		xmin, xmax = find_range(array, xmin=xmin, xmax=xmax)
		ax.hist(
		array1,
		bins=nbins,
		range=(xmin, xmax),
		color="tab:blue",
		alpha=0.5,
		label=label1,
		density=density,
		)
		ax.hist(
		array2,
		bins=nbins,
		range=(xmin, xmax),
		color="tab:orange",
		alpha=0.5,
		label=label2,
		density=density,
		)
		readable_xticklabels(ax, max_num_xticks=100)
		if xmin < xmax:
		ax.set_xlim(xmin, xmax)
		ax.set_ylabel("density (number of occurrences/total)" if density else "number of occurrences")
		if yscale == "auto":
		yscale = auto_yscale_continuous(array=array, nbins=nbins)
		ax.set_yscale(yscale)
		ax.legend()

+195

src/arkas/plot/discrete.py

		r"""Contain plotting functions to analyze discrete values."""

		from __future__ import annotations

		__all__ = ["bar_discrete", "bar_discrete_temporal"]

		from typing import TYPE_CHECKING

		import numpy as np
		from matplotlib import pyplot as plt

		from arkas.plot.utils import auto_yscale_discrete, readable_xticklabels

		if TYPE_CHECKING:
		from collections.abc import Sequence

		from matplotlib.axes import Axes


		def bar_discrete(
		ax: Axes,
		names: Sequence,
		counts: Sequence[int],
		yscale: str = "auto",
		) -> None:
		r"""Plot the histogram of an array containing discrete values.

		Args:
		ax: The axes of the matplotlib figure to update.
		names: The name of the values to plot.
		counts: The number of value occurrences.
		yscale: The y-axis scale. If ``'auto'``, the
		``'linear'`` or ``'log'/'symlog'`` scale is chosen based
		on the distribution.

		Example usage:

		```pycon

		>>> from matplotlib import pyplot as plt
		>>> from arkas.plot import bar_discrete
		>>> fig, ax = plt.subplots()
		>>> bar_discrete(ax, names=["a", "b", "c", "d"], counts=[5, 100, 42, 27])

		```
		"""
		n = len(names)
		if n == 0:
		return
		x = np.arange(n)
		ax.bar(x, counts, width=0.9 if n < 50 else 1, color="tab:blue")
		if yscale == "auto":
		yscale = auto_yscale_discrete(min_count=min(counts), max_count=max(counts))
		ax.set_yscale(yscale)
		ax.set_xticks(x, labels=map(str, names))
		readable_xticklabels(ax, max_num_xticks=100)
		ax.set_xlim(-0.5, len(names) - 0.5)
		ax.set_xlabel("values")
		ax.set_ylabel("number of occurrences")


		def bar_discrete_temporal(
		ax: Axes,
		counts: np.ndarray,
		steps: Sequence \| None = None,
		values: Sequence \| None = None,
		proportion: bool = False,
		) -> None:
		r"""Plot the temporal distribution of discrete values.

		Args:
		ax: The axes of the matplotlib figure to update.
		counts: A 2-d array that indicates the number of occurrences
		for each value and time step. The first dimension
		represents the value and the second dimension
		represents the steps.
		steps: The name associated to each step.
		values: The name associated to each value.
		proportion: If ``True``, it plots the normalized number of
		occurrences for each step.

		Example usage:

		```pycon

		>>> from matplotlib import pyplot as plt
		>>> from arkas.plot import bar_discrete_temporal
		>>> fig, ax = plt.subplots()
		>>> bar_discrete_temporal(
		... ax, counts=np.ones((5, 20)), values=list(range(5)), steps=list(range(20))
		... )

		```
		"""
		if counts.size == 0:
		return
		num_values, num_steps = counts.shape
		values = _prepare_values_bar_discrete_temporal(values=values, num_values=num_values)
		steps = _prepare_steps_bar_discrete_temporal(steps=steps, num_steps=num_steps)
		counts = _prepare_counts_bar_discrete_temporal(counts=counts, proportion=proportion)

		x = np.arange(num_steps, dtype=np.int64)
		bottom = np.zeros(num_steps, dtype=counts.dtype)
		width = 0.9 if num_steps < 50 else 1
		my_cmap = plt.get_cmap("viridis")
		for i in range(num_values):
		count = counts[i]
		ax.bar(x, count, label=values[i], bottom=bottom, width=width, color=my_cmap(i / num_values))
		bottom += count

		num_valid_values = len(list(filter(lambda x: x is not None, values)))
		if num_valid_values <= 10 and num_valid_values > 0:
		ax.legend()
		ax.set_xticks(x, labels=steps)
		readable_xticklabels(ax, max_num_xticks=100)
		ax.set_xlim(-0.5, num_steps - 0.5)
		ax.set_ylabel("steps")
		ax.set_ylabel("proportion" if proportion else "number of occurrences")


		def _prepare_values_bar_discrete_temporal(values: Sequence \| None, num_values: int) -> list:
		r"""Return the list of values.

		This function was designed to be used in ``bar_discrete_temporal``.

		Args:
		values: The sequence of values.
		num_values: The expected number of values.

		Returns:
		The values. If ``values`` is ``None``, a list filled with
		``None`` is returned.

		Raises:
		RuntimeError: if the length of ``values`` does not match with
		``num_values``.
		"""
		if values is None:
		return [None] * num_values
		if len(values) != num_values:
		msg = (
		f"values length ({len(values):,}) do not match with the count matrix "
		f"first dimension ({num_values:,})"
		)
		raise RuntimeError(msg)
		return list(values)


		def _prepare_steps_bar_discrete_temporal(steps: Sequence \| None, num_steps: int) -> list:
		r"""Return the list of steps.

		This function was designed to be used in ``bar_discrete_temporal``.

		Args:
		steps: The sequence of steps.
		num_steps: The expected number of steps.

		Returns:
		The steps. If ``steps`` is ``None``, a list filled with
		``None`` is returned.

		Raises:
		RuntimeError: if the length of ``steps`` does not match with
		``num_steps``.
		"""
		if steps is None:
		return list(range(num_steps))
		if len(steps) != num_steps:
		msg = (
		f"steps length ({len(steps):,}) do not match with the count matrix "
		f"second dimension ({num_steps:,})"
		)
		raise RuntimeError(msg)
		return list(steps)


		def _prepare_counts_bar_discrete_temporal(counts: np.ndarray, proportion: bool) -> np.ndarray:
		r"""Prepare the count matrix.

		This function was designed to be used in ``bar_discrete_temporal``.

		Args:
		counts: A 2-d array that indicates the number of occurrences
		for each value and time step. The first dimension
		represents the value and the second dimension
		represents the steps.
		proportion: If ``True``, the count matrix is normalized number
		of occurrences for each step.

		Returns:
		The count matrix.
		"""
		if not proportion:
		return counts
		return counts / np.clip(counts.sum(axis=0), a_min=1, a_max=None)

+76

src/arkas/plot/null_temporal.py

		r"""Contain functionalities to plot the temporal distribution of the
		number of missing values."""

		from __future__ import annotations

		__all__ = ["plot_null_temporal"]

		from typing import TYPE_CHECKING

		import numpy as np

		if TYPE_CHECKING:
		from collections.abc import Sequence

		from matplotlib.axes import Axes


		def plot_null_temporal(ax: Axes, nulls: Sequence, totals: Sequence, labels: Sequence) -> None:
		r"""Plot the temporal distribution of the number of missing values.

		``nulls``, ``totals``, and ``labels`` must have the same length
		and have the same order.

		Args:
		ax: The Axes object that encapsulates all the elements of an
		individual (sub-)plot in a figure.
		nulls: The number of null values for each temporal period.
		totals: The number of total values for each temporal period.
		labels: The labels for each temporal period.

		Raises:
		RuntimeError: if ``nulls``, ``totals``, and ``labels`` have
		different lengths.

		Example usage:

		```pycon

		>>> from matplotlib import pyplot as plt
		>>> from arkas.plot import plot_null_temporal
		>>> fig, ax = plt.subplots()
		>>> plot_null_temporal(
		... ax, nulls=[1, 2, 3, 4], totals=[10, 12, 14, 16], labels=["jan", "feb", "mar", "apr"]
		... )

		```
		"""
		if len(nulls) != len(totals):
		msg = f"nulls ({len(nulls):,}) and totals ({len(totals):,}) have different lengths"
		raise ValueError(msg)
		if len(labels) != len(totals):
		msg = f"nulls ({len(nulls):,}) and labels ({len(labels):,}) have different lengths"
		raise ValueError(msg)
		if len(nulls) == 0:
		return

		labels = list(map(str, labels))
		nulls = np.asarray(nulls)
		totals = np.asarray(totals)

		color = "tab:blue"
		x = np.arange(len(labels))
		ax.set_ylabel("number of null/total values", color=color)
		ax.tick_params(axis="y", labelcolor=color)
		ax.bar(x=x, height=totals, color="tab:cyan", alpha=0.5, label="total")
		ax.bar(x=x, height=nulls, color=color, alpha=0.8, label="null")
		ax.legend()

		ax2 = ax.twinx()
		color = "black"
		ax2.set_ylabel("percentage", color=color)
		ax2.tick_params(axis="y", labelcolor=color)
		ax2.plot(x, nulls / totals, "o-", color=color)

		ax.set_xticks(x, labels=labels)
		ax.set_xlim(-0.5, len(labels) - 0.5)

+153

src/arkas/plotter/continuous_series.py

		r"""Contain the implementation of a plotter to analyze a Series with
		continuous values."""

		from __future__ import annotations

		__all__ = ["BaseFigureCreator", "ContinuousSeriesPlotter", "MatplotlibFigureCreator"]

		from abc import ABC, abstractmethod
		from typing import TYPE_CHECKING, Any

		import matplotlib.pyplot as plt
		from coola.utils import repr_indent, repr_mapping, str_indent, str_mapping

		from arkas.figure.creator import FigureCreatorRegistry
		from arkas.figure.html import HtmlFigure
		from arkas.figure.matplotlib import MatplotlibFigure, MatplotlibFigureConfig
		from arkas.figure.utils import MISSING_FIGURE_MESSAGE
		from arkas.plot.continuous import hist_continuous
		from arkas.plot.utils.hist import adjust_nbins
		from arkas.plotter.base import BasePlotter
		from arkas.plotter.vanilla import Plotter
		from arkas.utils.array import filter_range, nonnan, to_array
		from arkas.utils.range import find_range

		if TYPE_CHECKING:
		from arkas.figure.base import BaseFigure
		from arkas.state.series import SeriesState


		class BaseFigureCreator(ABC):
		r"""Define the base class to create a figure with the content of the
		column."""

		@abstractmethod
		def create(self, state: SeriesState) -> BaseFigure:
		r"""Create a figure with the content of the column.

		Args:
		state: The state containing the Series to analyze.

		Returns:
		The generated figure.

		Example usage:

		```pycon

		>>> import polars as pl
		>>> from arkas.figure import MatplotlibFigureConfig
		>>> from arkas.state import SeriesState
		>>> creator = MatplotlibFigureCreator()
		>>> fig = creator.create(SeriesState(pl.Series("col1", [1, 2, 3, 4, 5, 6, 7])))

		```
		"""


		class MatplotlibFigureCreator(BaseFigureCreator):
		r"""Create a matplotlib figure with the content of each column.

		Example usage:

		```pycon

		>>> import polars as pl
		>>> from arkas.figure import MatplotlibFigureConfig
		>>> from arkas.state import SeriesState
		>>> creator = MatplotlibFigureCreator()
		>>> fig = creator.create(SeriesState(pl.Series("col1", [1, 2, 3, 4, 5, 6, 7])))

		```
		"""

		def __repr__(self) -> str:
		return f"{self.__class__.__qualname__}()"

		def create(self, state: SeriesState) -> BaseFigure:
		array = nonnan(to_array(state.series))
		if array.size == 0:
		return HtmlFigure(MISSING_FIGURE_MESSAGE)

		fig, ax = plt.subplots(**state.figure_config.get_arg("init", {}))
		xmin, xmax = find_range(
		array,
		xmin=state.figure_config.get_arg("xmin"),
		xmax=state.figure_config.get_arg("xmax"),
		)
		nbins = adjust_nbins(
		nbins=state.figure_config.get_arg("nbins"),
		array=filter_range(array, xmin=xmin, xmax=xmax),
		)
		hist_continuous(
		ax=ax,
		array=array,
		nbins=nbins,
		xmin=xmin,
		xmax=xmax,
		yscale=state.figure_config.get_arg("yscale", default="linear"),
		)
		ax.set_title(f"data distribution for column {state.series.name!r}")
		fig.tight_layout()
		return MatplotlibFigure(fig)


		class ContinuousSeriesPlotter(BasePlotter):
		r"""Implement a plotter that analyzes a column with continuous
		values.

		Args:
		state: The state containing the Series to analyze.

		Example usage:

		```pycon

		>>> import polars as pl
		>>> from arkas.plotter import ContinuousSeriesPlotter
		>>> from arkas.state import SeriesState
		>>> plotter = ContinuousSeriesPlotter(SeriesState(pl.Series("col1", [1, 2, 3, 4, 5, 6, 7])))
		>>> plotter
		ContinuousSeriesPlotter(
		(state): SeriesState(name='col1', values=(7,), figure_config=MatplotlibFigureConfig())
		)

		```
		"""

		registry = FigureCreatorRegistry[BaseFigureCreator](
		{MatplotlibFigureConfig.backend(): MatplotlibFigureCreator()}
		)

		def __init__(self, state: SeriesState) -> None:
		self._state = state

		def __repr__(self) -> str:
		args = repr_indent(repr_mapping({"state": self._state}))
		return f"{self.__class__.__qualname__}(\n {args}\n)"

		def __str__(self) -> str:
		args = str_indent(str_mapping({"state": self._state}))
		return f"{self.__class__.__qualname__}(\n {args}\n)"

		def compute(self) -> Plotter:
		return Plotter(self.plot())

		def equal(self, other: Any, equal_nan: bool = False) -> bool:
		if not isinstance(other, self.__class__):
		return False
		return self._state.equal(other._state, equal_nan=equal_nan)

		def plot(self, prefix: str = "", suffix: str = "") -> dict:
		figure = self.registry.find_creator(self._state.figure_config.backend()).create(self._state)
		return {f"{prefix}continuous_histogram{suffix}": figure}

+161

src/arkas/plotter/null_value.py

		r"""Contain the implementation of a plotter that plots the number of
		null values for each column."""

		from __future__ import annotations

		__all__ = ["BaseFigureCreator", "MatplotlibFigureCreator", "NullValuePlotter"]

		from abc import ABC, abstractmethod
		from typing import TYPE_CHECKING, Any

		import matplotlib.pyplot as plt
		from coola.utils import repr_indent, repr_mapping, str_indent, str_mapping

		from arkas.figure.creator import FigureCreatorRegistry
		from arkas.figure.html import HtmlFigure
		from arkas.figure.matplotlib import MatplotlibFigure, MatplotlibFigureConfig
		from arkas.figure.utils import MISSING_FIGURE_MESSAGE
		from arkas.plot.utils import readable_xticklabels
		from arkas.plotter.base import BasePlotter
		from arkas.plotter.vanilla import Plotter

		if TYPE_CHECKING:
		from arkas.figure.base import BaseFigure
		from arkas.state.null_value import NullValueState


		class BaseFigureCreator(ABC):
		r"""Define the base class to create a bar plot figure with the
		number of null values for each column."""

		@abstractmethod
		def create(self, state: NullValueState) -> BaseFigure:
		r"""Create a bar plot figure with the number of null values for
		each column.

		Args:
		state: The state containing the number of null values per
		column.

		Returns:
		The generated figure.

		Example usage:

		```pycon

		>>> import numpy as np
		>>> from arkas.figure import MatplotlibFigureConfig
		>>> from arkas.state import NullValueState
		>>> creator = MatplotlibFigureCreator()
		>>> fig = creator.create(
		... NullValueState(
		... null_count=np.array([0, 1, 2]),
		... total_count=np.array([5, 5, 5]),
		... columns=["col1", "col2", "col3"],
		... )
		... )

		```
		"""


		class MatplotlibFigureCreator(BaseFigureCreator):
		r"""Create a matplotlib figure with the number of null values for
		each column.

		Example usage:

		```pycon

		>>> import numpy as np
		>>> from arkas.figure import MatplotlibFigureConfig
		>>> from arkas.state import NullValueState
		>>> creator = MatplotlibFigureCreator()
		>>> fig = creator.create(
		... NullValueState(
		... null_count=np.array([0, 1, 2]),
		... total_count=np.array([5, 5, 5]),
		... columns=["col1", "col2", "col3"],
		... )
		... )

		```
		"""

		def __repr__(self) -> str:
		return f"{self.__class__.__qualname__}()"

		def create(self, state: NullValueState) -> BaseFigure:
		if state.null_count.shape[0] == 0:
		return HtmlFigure(MISSING_FIGURE_MESSAGE)

		fig, ax = plt.subplots(**state.figure_config.get_arg("init", {}))

		frame = state.to_dataframe().sort(by=["null", "column"])
		ax.bar(x=frame["column"].to_list(), height=frame["null"].to_numpy(), color="tab:blue")
		ax.set_xlim(-0.5, len(state.columns) - 0.5)
		readable_xticklabels(ax, max_num_xticks=100)
		ax.set_xlabel("column")
		ax.set_ylabel("number of null values")
		ax.set_title("number of null values per column")
		fig.tight_layout()
		return MatplotlibFigure(fig)


		class NullValuePlotter(BasePlotter):
		r"""Implement a plotter that plots the number of null values for each
		column.

		Args:
		state: The state containing the number of null values per
		column.

		Example usage:

		```pycon

		>>> import numpy as np
		>>> from arkas.plotter import NullValuePlotter
		>>> from arkas.state import NullValueState
		>>> plotter = NullValuePlotter(
		... NullValueState(
		... null_count=np.array([0, 1, 2]),
		... total_count=np.array([5, 5, 5]),
		... columns=["col1", "col2", "col3"],
		... )
		... )
		>>> plotter
		NullValuePlotter(
		(state): NullValueState(num_columns=3, figure_config=MatplotlibFigureConfig())
		)

		```
		"""

		registry = FigureCreatorRegistry[BaseFigureCreator](
		{MatplotlibFigureConfig.backend(): MatplotlibFigureCreator()}
		)

		def __init__(self, state: NullValueState) -> None:
		self._state = state

		def __repr__(self) -> str:
		args = repr_indent(repr_mapping({"state": self._state}))
		return f"{self.__class__.__qualname__}(\n {args}\n)"

		def __str__(self) -> str:
		args = str_indent(str_mapping({"state": self._state}))
		return f"{self.__class__.__qualname__}(\n {args}\n)"

		def compute(self) -> Plotter:
		return Plotter(self.plot())

		def equal(self, other: Any, equal_nan: bool = False) -> bool:
		if not isinstance(other, self.__class__):
		return False
		return self._state.equal(other._state, equal_nan=equal_nan)

		def plot(self, prefix: str = "", suffix: str = "") -> dict:
		figure = self.registry.find_creator(self._state.figure_config.backend()).create(self._state)
		return {f"{prefix}null_values{suffix}": figure}

+209

src/arkas/plotter/temporal_null_value.py

		r"""Contain the implementation of a DataFrame column plotter."""

		from __future__ import annotations

		__all__ = ["BaseFigureCreator", "MatplotlibFigureCreator", "TemporalNullValuePlotter"]

		from abc import ABC, abstractmethod
		from typing import TYPE_CHECKING, Any

		import matplotlib.pyplot as plt
		from coola.utils import repr_indent, repr_mapping, str_indent, str_mapping
		from grizz.utils.null import compute_temporal_null_count

		from arkas.figure.creator import FigureCreatorRegistry
		from arkas.figure.html import HtmlFigure
		from arkas.figure.matplotlib import MatplotlibFigure, MatplotlibFigureConfig
		from arkas.figure.utils import MISSING_FIGURE_MESSAGE
		from arkas.plot import plot_null_temporal
		from arkas.plot.utils import readable_xticklabels
		from arkas.plotter.base import BasePlotter
		from arkas.plotter.vanilla import Plotter

		if TYPE_CHECKING:

		from arkas.figure.base import BaseFigure
		from arkas.state.temporal_dataframe import TemporalDataFrameState


		class BaseFigureCreator(ABC):
		r"""Define the base class to create a figure with the content of
		each column."""

		@abstractmethod
		def create(self, state: TemporalDataFrameState) -> BaseFigure:
		r"""Create a figure with the content of each column.

		Args:
		state: The state containing the DataFrame to analyze.

		Returns:
		The generated figure.

		Example usage:

		```pycon

		>>> from datetime import datetime, timezone
		>>> import polars as pl
		>>> from arkas.plotter.temporal_null_value import MatplotlibFigureCreator
		>>> from arkas.state import TemporalDataFrameState
		>>> creator = MatplotlibFigureCreator()
		>>> frame = pl.DataFrame(
		... {
		... "col1": [0, 1, 1, 0],
		... "col2": [0, 1, 0, 1],
		... "col3": [1, 0, 0, 0],
		... "datetime": [
		... datetime(year=2020, month=1, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=2, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=3, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=4, day=3, tzinfo=timezone.utc),
		... ],
		... },
		... schema={
		... "col1": pl.Int64,
		... "col2": pl.Int64,
		... "col3": pl.Int64,
		... "datetime": pl.Datetime(time_unit="us", time_zone="UTC"),
		... },
		... )
		>>> fig = creator.create(
		... TemporalDataFrameState(frame, temporal_column="datetime", period="1d")
		... )

		```
		"""


		class MatplotlibFigureCreator(BaseFigureCreator):
		r"""Create a matplotlib figure with the content of each column.

		Example usage:

		```pycon

		>>> from datetime import datetime, timezone
		>>> import polars as pl
		>>> from arkas.plotter.temporal_null_value import MatplotlibFigureCreator
		>>> from arkas.state import TemporalDataFrameState
		>>> creator = MatplotlibFigureCreator()
		>>> frame = pl.DataFrame(
		... {
		... "col1": [0, 1, 1, 0],
		... "col2": [0, 1, 0, 1],
		... "col3": [1, 0, 0, 0],
		... "datetime": [
		... datetime(year=2020, month=1, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=2, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=3, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=4, day=3, tzinfo=timezone.utc),
		... ],
		... },
		... schema={
		... "col1": pl.Int64,
		... "col2": pl.Int64,
		... "col3": pl.Int64,
		... "datetime": pl.Datetime(time_unit="us", time_zone="UTC"),
		... },
		... )
		>>> fig = creator.create(
		... TemporalDataFrameState(frame, temporal_column="datetime", period="1d")
		... )

		```
		"""

		def __repr__(self) -> str:
		return f"{self.__class__.__qualname__}()"

		def create(self, state: TemporalDataFrameState) -> BaseFigure:
		if state.dataframe.shape[0] == 0:
		return HtmlFigure(MISSING_FIGURE_MESSAGE)

		fig, ax = plt.subplots(**state.figure_config.get_arg("init", {}))
		columns = list(state.dataframe.columns)
		columns.remove(state.temporal_column)
		nulls, totals, labels = compute_temporal_null_count(
		frame=state.dataframe,
		columns=columns,
		temporal_column=state.temporal_column,
		period=state.period,
		)
		plot_null_temporal(ax=ax, labels=labels, nulls=nulls, totals=totals)
		readable_xticklabels(ax, max_num_xticks=100)

		fig.tight_layout()
		return MatplotlibFigure(fig)


		class TemporalNullValuePlotter(BasePlotter):
		r"""Implement a DataFrame column plotter.

		Args:
		state: The state containing the DataFrame to analyze.

		Example usage:

		```pycon

		>>> from datetime import datetime, timezone
		>>> import polars as pl
		>>> from arkas.plotter import TemporalNullValuePlotter
		>>> from arkas.state import TemporalDataFrameState
		>>> frame = pl.DataFrame(
		... {
		... "col1": [0, 1, 1, 0],
		... "col2": [0, 1, 0, 1],
		... "col3": [1, 0, 0, 0],
		... "datetime": [
		... datetime(year=2020, month=1, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=2, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=3, day=3, tzinfo=timezone.utc),
		... datetime(year=2020, month=4, day=3, tzinfo=timezone.utc),
		... ],
		... },
		... schema={
		... "col1": pl.Int64,
		... "col2": pl.Int64,
		... "col3": pl.Int64,
		... "datetime": pl.Datetime(time_unit="us", time_zone="UTC"),
		... },
		... )
		>>> plotter = TemporalNullValuePlotter(
		... TemporalDataFrameState(frame, temporal_column="datetime", period="1d")
		... )
		>>> plotter
		TemporalNullValuePlotter(
		(state): TemporalDataFrameState(dataframe=(4, 4), temporal_column='datetime', period='1d', figure_config=MatplotlibFigureConfig())
		)

		```
		"""

		registry = FigureCreatorRegistry[BaseFigureCreator](
		{MatplotlibFigureConfig.backend(): MatplotlibFigureCreator()}
		)

		def __init__(self, state: TemporalDataFrameState) -> None:
		self._state = state

		def __repr__(self) -> str:
		args = repr_indent(repr_mapping({"state": self._state}))
		return f"{self.__class__.__qualname__}(\n {args}\n)"

		def __str__(self) -> str:
		args = str_indent(str_mapping({"state": self._state}))
		return f"{self.__class__.__qualname__}(\n {args}\n)"

		def compute(self) -> Plotter:
		return Plotter(self.plot())

		def equal(self, other: Any, equal_nan: bool = False) -> bool:
		if not isinstance(other, self.__class__):
		return False
		return self._state.equal(other._state, equal_nan=equal_nan)

		def plot(self, prefix: str = "", suffix: str = "") -> dict:
		figure = self.registry.find_creator(self._state.figure_config.backend()).create(self._state)
		return {f"{prefix}temporal_null_value{suffix}": figure}

+198

src/arkas/state/null_value.py

		r"""Implement a state that contains the number of null values per
		columns."""

		from __future__ import annotations

		__all__ = ["NullValueState"]

		import sys
		from typing import TYPE_CHECKING, Any

		import numpy as np
		import polars as pl
		from coola import objects_are_equal
		from coola.utils.format import repr_mapping_line
		from grizz.utils.null import compute_null_count

		from arkas.figure import BaseFigureConfig, get_default_config
		from arkas.state.base import BaseState

		if sys.version_info >= (3, 11):
		from typing import Self
		else: # pragma: no cover
		from typing_extensions import (
		Self, # use backport because it was added in python 3.11
		)

		if TYPE_CHECKING:
		from collections.abc import Sequence


		class NullValueState(BaseState):
		r"""Implement a state that contains the number of null values per
		columns.

		Args:
		null_count: The array with the number of null values for each column.
		total_count: The total number of values for each column.
		columns: The column names.
		figure_config: An optional figure configuration.

		Example usage:

		```pycon

		>>> import numpy as np
		>>> from arkas.state import NullValueState
		>>> state = NullValueState(
		... null_count=np.array([0, 1, 2]),
		... total_count=np.array([5, 5, 5]),
		... columns=["col1", "col2", "col3"],
		... )
		>>> state
		NullValueState(num_columns=3, figure_config=MatplotlibFigureConfig())

		```
		"""

		def __init__(
		self,
		null_count: np.ndarray,
		total_count: np.ndarray,
		columns: Sequence[str],
		figure_config: BaseFigureConfig \| None = None,
		) -> None:
		self._null_count = null_count.ravel()
		self._total_count = total_count.ravel()
		self._columns = tuple(columns)
		self._figure_config = figure_config or get_default_config()

		if len(self._columns) != self._null_count.shape[0]:
		msg = (
		f"'columns' ({len(self._columns):,}) and 'null_count' "
		f"({self._null_count.shape[0]:,}) do not match"
		)
		raise ValueError(msg)
		if len(self._columns) != self._total_count.shape[0]:
		msg = (
		f"'columns' ({len(self._columns):,}) and 'total_count' "
		f"({self._total_count.shape[0]:,}) do not match"
		)
		raise ValueError(msg)

		def __repr__(self) -> str:
		args = repr_mapping_line(
		{
		"num_columns": self._null_count.shape[0],
		"figure_config": self._figure_config,
		}
		)
		return f"{self.__class__.__qualname__}({args})"

		@property
		def columns(self) -> tuple[str, ...]:
		return self._columns

		@property
		def null_count(self) -> np.ndarray:
		return self._null_count

		@property
		def total_count(self) -> np.ndarray:
		return self._total_count

		@property
		def figure_config(self) -> BaseFigureConfig \| None:
		return self._figure_config

		def clone(self, deep: bool = True) -> Self:
		return self.__class__(
		null_count=self._null_count.copy() if deep else self._null_count,
		total_count=self._total_count.copy() if deep else self._total_count,
		columns=self._columns,
		figure_config=self._figure_config.clone() if deep else self._figure_config,
		)

		def equal(self, other: Any, equal_nan: bool = False) -> bool:
		if not isinstance(other, self.__class__):
		return False
		return (
		objects_are_equal(self.null_count, other.null_count, equal_nan=equal_nan)
		and objects_are_equal(self.total_count, other.total_count, equal_nan=equal_nan)
		and objects_are_equal(self.columns, other.columns, equal_nan=equal_nan)
		and objects_are_equal(self.figure_config, other.figure_config, equal_nan=equal_nan)
		)

		def to_dataframe(self) -> pl.DataFrame:
		r"""Export the content of the state to a DataFrame.

		Returns:
		The DataFrame.

		```pycon

		>>> import numpy as np
		>>> from arkas.state import NullValueState
		>>> state = NullValueState(
		... null_count=np.array([0, 1, 2]),
		... total_count=np.array([5, 5, 5]),
		... columns=["col1", "col2", "col3"],
		... )
		>>> state.to_dataframe()
		shape: (3, 3)
		┌────────┬──────┬───────┐
		│ column ┆ null ┆ total │
		│ --- ┆ --- ┆ --- │
		│ str ┆ i64 ┆ i64 │
		╞════════╪══════╪═══════╡
		│ col1 ┆ 0 ┆ 5 │
		│ col2 ┆ 1 ┆ 5 │
		│ col3 ┆ 2 ┆ 5 │
		└────────┴──────┴───────┘

		```
		"""
		return pl.DataFrame(
		{"column": self._columns, "null": self._null_count, "total": self._total_count},
		schema={"column": pl.String, "null": pl.Int64, "total": pl.Int64},
		)

		@classmethod
		def from_dataframe(
		cls, dataframe: pl.DataFrame, figure_config: BaseFigureConfig \| None = None
		) -> NullValueState:
		r"""Instantiate a ``NullValueState`` object from a DataFrame.

		Args:
		dataframe: The DataFrame.
		figure_config: An optional figure configuration.

		Returns:
		The instantiated ``NullValueState`` object.

		Example usage:

		```pycon

		>>> import polars as pl
		>>> from arkas.state import NullValueState
		>>> frame = pl.DataFrame(
		... {
		... "col1": [0, 1, 1, 0, 0, 1, None],
		... "col2": [0, 1, None, None, 0, 1, 0],
		... "col3": [None, 0, 0, 0, None, 1, None],
		... }
		... )
		>>> state = NullValueState.from_dataframe(frame)
		>>> state
		NullValueState(num_columns=3, figure_config=MatplotlibFigureConfig())

		```
		"""
		nrows, ncols = dataframe.shape
		return cls(
		columns=list(dataframe.columns),
		null_count=compute_null_count(dataframe),
		total_count=np.full((ncols,), nrows),
		figure_config=figure_config,
		)

+102

src/arkas/state/series.py

		r"""Implement the Series state."""

		from __future__ import annotations

		__all__ = ["SeriesState"]

		import sys
		from typing import TYPE_CHECKING, Any

		from coola import objects_are_equal
		from coola.utils.format import repr_mapping_line, str_indent, str_mapping

		from arkas.figure.utils import get_default_config
		from arkas.state.base import BaseState

		if sys.version_info >= (3, 11):
		from typing import Self
		else: # pragma: no cover
		from typing_extensions import (
		Self, # use backport because it was added in python 3.11
		)

		if TYPE_CHECKING:
		import polars as pl

		from arkas.figure.base import BaseFigureConfig


		class SeriesState(BaseState):
		r"""Implement the Series state.

		Args:
		series: The Series.
		figure_config: An optional figure configuration.

		Example usage:

		```pycon

		>>> import polars as pl
		>>> from arkas.state import SeriesState
		>>> state = SeriesState(pl.Series("col1", [1, 2, 3, 4, 5, 6, 7]))
		>>> state
		SeriesState(name='col1', values=(7,), figure_config=MatplotlibFigureConfig())

		```
		"""

		def __init__(
		self,
		series: pl.Series,
		figure_config: BaseFigureConfig \| None = None,
		) -> None:
		self._series = series
		self._figure_config = figure_config or get_default_config()

		def __repr__(self) -> str:
		args = repr_mapping_line(
		{
		"name": self._series.name,
		"values": self._series.shape,
		"figure_config": self._figure_config,
		}
		)
		return f"{self.__class__.__qualname__}({args})"

		def __str__(self) -> str:
		args = str_indent(
		str_mapping(
		{
		"name": self._series.name,
		"values": self._series.shape,
		"figure_config": self._figure_config,
		}
		)
		)
		return f"{self.__class__.__qualname__}({args})"

		@property
		def series(self) -> pl.Series:
		return self._series

		@property
		def figure_config(self) -> BaseFigureConfig \| None:
		return self._figure_config

		def clone(self, deep: bool = True) -> Self:
		return self.__class__(
		series=self._series.clone() if deep else self._series,
		figure_config=self._figure_config.clone() if deep else self._figure_config,
		)

		def equal(self, other: Any, equal_nan: bool = False) -> bool:
		if not isinstance(other, self.__class__):
		return False
		return objects_are_equal(self.get_args(), other.get_args(), equal_nan=equal_nan)

		def get_args(self) -> dict:
		return {
		"series": self._series,
		"figure_config": self._figure_config,
		}

+62

src/arkas/utils/range.py

		r"""Contain utility functions to manage ranges of values."""

		from __future__ import annotations

		__all__ = ["find_range"]

		import numpy as np


		def find_range(
		values: np.ndarray,
		xmin: float \| str \| None = None,
		xmax: float \| str \| None = None,
		) -> tuple[float, float]:
		r"""Find a valid range of value.

		Args:
		values: The values used to find the quantiles.
		xmin: The minimum value of the range or its
		associated quantile. ``q0.1`` means the 10% quantile.
		``0`` is the minimum value and ``1`` is the maximum value.
		xmax: The maximum value of the range or its
		associated quantile. ``q0.9`` means the 90% quantile.
		``0`` is the minimum value and ``1`` is the maximum value.

		Returns:
		The range of values in the format ``(min, max)``.
		It returns ``(nan, nan)`` if the input array is empty.

		Example usage:

		```pycon

		>>> import numpy as np
		>>> from arkas.utils.range import find_range
		>>> data = np.arange(101)
		>>> find_range(data)
		(0, 100)
		>>> find_range(data, xmin=5, xmax=50)
		(5, 50)
		>>> find_range(data, xmin="q0.1", xmax="q0.9")
		(10.0, 90.0)

		```
		"""
		if values.size == 0:
		return float("nan"), float("nan")
		if xmin is None:
		xmin = np.nanmin(values).item()
		if xmax is None:
		xmax = np.nanmax(values).item()
		q = [float(x[1:]) for x in [xmin, xmax] if isinstance(x, str)]
		quantiles = np.nanquantile(values, q)
		if isinstance(xmin, str):
		xmin = quantiles[0]
		if isinstance(xmax, str):
		xmax = quantiles[-1]
		if isinstance(xmin, np.number):
		xmin = xmin.item()
		if isinstance(xmax, np.number):
		xmax = xmax.item()
		return (xmin, xmax)

+197

src/arkas/utils/stats.py

		r"""Contain statistics utility functions."""

		from __future__ import annotations

		__all__ = [
		"compute_statistics_continuous",
		"compute_statistics_continuous_array",
		"compute_statistics_continuous_series",
		"quantile",
		]

		from typing import TYPE_CHECKING

		import numpy as np
		import polars as pl
		from scipy.stats import kurtosis, skew

		from arkas.utils.array import nonnan

		if TYPE_CHECKING:
		from collections.abc import Sequence


		def compute_statistics_continuous(data: np.ndarray \| pl.Series) -> dict[str, float]:
		r"""Return several descriptive statistics for the data with
		continuous values.

		Args:
		data: The data to analyze.

		Returns:
		The descriptive statistics for the input data.

		Example usage:

		```pycon

		>>> import numpy as np
		>>> from arkas.utils.stats import compute_statistics_continuous
		>>> compute_statistics_continuous(np.arange(101))
		{'count': 101, 'nunique': 101, 'num_non_nulls': 101, 'num_nulls': 0,
		'mean': 50.0, 'std': 29.15...,
		'skewness': 0.0, 'kurtosis': -1.20..., 'min': 0.0, 'q001': 0.1, 'q01': 1.0,
		'q05': 5.0, 'q10': 10.0, 'q25': 25.0, 'median': 50.0, 'q75': 75.0, 'q90': 90.0,
		'q95': 95.0, 'q99': 99.0, 'q999': 99.9, 'max': 100.0, '>0': 100, '<0': 0, '=0': 1}

		```
		"""
		if isinstance(data, pl.Series):
		return compute_statistics_continuous_series(data)
		return compute_statistics_continuous_array(data)


		def compute_statistics_continuous_array(array: np.ndarray) -> dict[str, float]:
		r"""Return several descriptive statistics for the data with
		continuous values.

		Args:
		array: The data to analyze.

		Returns:
		The descriptive statistics for the input data.

		Example usage:

		```pycon

		>>> import numpy as np
		>>> from arkas.utils.stats import compute_statistics_continuous_array
		>>> compute_statistics_continuous_array(np.arange(101))
		{'count': 101, 'nunique': 101, 'num_non_nulls': 101, 'num_nulls': 0,
		'mean': 50.0, 'std': 29.15...,
		'skewness': 0.0, 'kurtosis': -1.20..., 'min': 0.0, 'q001': 0.1, 'q01': 1.0,
		'q05': 5.0, 'q10': 10.0, 'q25': 25.0, 'median': 50.0, 'q75': 75.0, 'q90': 90.0,
		'q95': 95.0, 'q99': 99.0, 'q999': 99.9, 'max': 100.0, '>0': 100, '<0': 0, '=0': 1}

		```
		"""
		array = array.ravel().astype(np.float64)
		array_nonnan = nonnan(array)
		stats = {
		"count": int(array.size),
		"nunique": int(np.unique(array).size),
		"num_non_nulls": int(array_nonnan.size),
		}
		stats["num_nulls"] = stats["count"] - stats["num_non_nulls"]
		if array_nonnan.size == 0:
		return stats \| {
		"mean": float("nan"),
		"std": float("nan"),
		"skewness": float("nan"),
		"kurtosis": float("nan"),
		"min": float("nan"),
		"q001": float("nan"),
		"q01": float("nan"),
		"q05": float("nan"),
		"q10": float("nan"),
		"q25": float("nan"),
		"median": float("nan"),
		"q75": float("nan"),
		"q90": float("nan"),
		"q95": float("nan"),
		"q99": float("nan"),
		"q999": float("nan"),
		"max": float("nan"),
		">0": 0,
		"<0": 0,
		"=0": 0,
		}
		quantiles = quantile(
		array_nonnan, q=[0.001, 0.01, 0.05, 0.1, 0.25, 0.75, 0.9, 0.95, 0.99, 0.999]
		)
		return stats \| {
		"mean": np.mean(array_nonnan).item(),
		"std": np.std(array_nonnan).item(),
		"skewness": float(skew(array_nonnan)),
		"kurtosis": float(kurtosis(array_nonnan)),
		"min": np.min(array_nonnan).item(),
		"q001": quantiles[0.001],
		"q01": quantiles[0.01],
		"q05": quantiles[0.05],
		"q10": quantiles[0.1],
		"q25": quantiles[0.25],
		"median": np.median(array_nonnan).item(),
		"q75": quantiles[0.75],
		"q90": quantiles[0.9],
		"q95": quantiles[0.95],
		"q99": quantiles[0.99],
		"q999": quantiles[0.999],
		"max": np.max(array_nonnan).item(),
		">0": (array > 0).sum().item(),
		"<0": (array < 0).sum().item(),
		"=0": (array == 0).sum().item(),
		}


		def compute_statistics_continuous_series(series: pl.Series) -> dict[str, float]:
		r"""Return several descriptive statistics for the data with
		continuous values.

		Args:
		series: The series to analyze.

		Returns:
		The descriptive statistics for the input data.

		Example usage:

		```pycon

		>>> import polars as pl
		>>> from arkas.utils.stats import compute_statistics_continuous_series
		>>> compute_statistics_continuous_series(pl.Series(list(range(101))))
		{'count': 101, 'nunique': 101, 'num_non_nulls': 101, 'num_nulls': 0,
		'mean': 50.0, 'std': 29.15...,
		'skewness': 0.0, 'kurtosis': -1.20..., 'min': 0.0, 'q001': 0.1, 'q01': 1.0,
		'q05': 5.0, 'q10': 10.0, 'q25': 25.0, 'median': 50.0, 'q75': 75.0, 'q90': 90.0,
		'q95': 95.0, 'q99': 99.0, 'q999': 99.9, 'max': 100.0, '>0': 100, '<0': 0, '=0': 1}

		```
		"""
		stats = {
		"count": int(series.shape[0]),
		"nunique": series.n_unique(),
		"num_nulls": int(series.null_count()),
		}
		stats["num_non_nulls"] = stats["count"] - stats["num_nulls"]
		return compute_statistics_continuous_array(series.drop_nulls().to_numpy()) \| stats


		def quantile(array: np.ndarray, q: Sequence[float]) -> dict[float, float]:
		r"""Compute the q-th quantile of the data.

		Args:
		array: The input data.
		q: The quantiles to compute. Values must be between 0 and 1
		inclusive.

		Returns:
		A dictionary with the quantiles values.

		Example usage:

		```pycon

		>>> import numpy as np
		>>> from arkas.utils.stats import quantile
		>>> quantile(np.arange(101), q=[0.001, 0.01, 0.05, 0.1, 0.25, 0.75, 0.9, 0.95, 0.99, 0.999])
		{0.001: 0.1, 0.01: 1.0, 0.05: 5.0, 0.1: 10.0, 0.25: 25.0, 0.75: 75.0,
		0.9: 90.0, 0.95: 95.0, 0.99: 99.0, 0.999: 99.9}

		```
		"""
		array = array.ravel()
		if array.size == 0:
		return {v: float("nan") for v in q}
		return dict(zip(q, np.quantile(array.astype(np.float64), q).tolist()))

+3

-3

PKG-INFO

		@@ -1,4 +0,4 @@
		Metadata-Version: 2.3
		Metadata-Version: 2.1
		Name: arkas
		Version: 0.0.1a9
		Version: 0.0.1a10
		Summary: Library to evaluate ML model performances
		@@ -29,3 +29,3 @@ Home-page: https://github.com/durandtibo/arkas
		Requires-Dist: coola (>=0.8.2,<1.0)
		Requires-Dist: grizz (>=0.2.0a15,<1.0)
		Requires-Dist: grizz (>=0.2.0a16,<1.0)
		Requires-Dist: hya (>=0.2,<1.0) ; extra == "all" or extra == "cli"
		@@ -32,0 +32,0 @@ Requires-Dist: hydra-core (>=1.3,<2.0) ; extra == "all" or extra == "cli"

+2

-2

pyproject.toml

		[tool.poetry]
		name = "arkas"
		version = "0.0.1a9"
		version = "0.0.1a10"
		description = "Library to evaluate ML model performances"
		@@ -35,3 +35,3 @@ readme = "README.md"
		coola = ">=0.8.2,<1.0"
		grizz = ">=0.2.0a15,<1.0"
		grizz = ">=0.2.0a16,<1.0"
		iden = ">=0.1,<1.0"
		@@ -38,0 +38,0 @@ jinja2 = ">=3.0,<4.0"

+6

-0

src/arkas/analyzer/__init__.py

		@@ -14,6 +14,9 @@ r"""Contain DataFrame analyzers."""
		"ContentAnalyzer",
		"ContinuousColumnAnalyzer",
		"DataFrameSummaryAnalyzer",
		"MappingAnalyzer",
		"NullValueAnalyzer",
		"PlotColumnAnalyzer",
		"ScatterColumnAnalyzer",
		"TemporalNullValueAnalyzer",
		"TemporalPlotColumnAnalyzer",
		@@ -31,8 +34,11 @@ "TransformAnalyzer",
		from arkas.analyzer.content import ContentAnalyzer
		from arkas.analyzer.continuous_column import ContinuousColumnAnalyzer
		from arkas.analyzer.frame_summary import DataFrameSummaryAnalyzer
		from arkas.analyzer.lazy import BaseInNLazyAnalyzer, BaseLazyAnalyzer
		from arkas.analyzer.mapping import MappingAnalyzer
		from arkas.analyzer.null_value import NullValueAnalyzer
		from arkas.analyzer.plot_column import PlotColumnAnalyzer
		from arkas.analyzer.scatter_column import ScatterColumnAnalyzer
		from arkas.analyzer.temporal_null_value import TemporalNullValueAnalyzer
		from arkas.analyzer.temporal_plot_column import TemporalPlotColumnAnalyzer
		from arkas.analyzer.transform import TransformAnalyzer

+6

-0

src/arkas/content/__init__.py

		@@ -12,5 +12,8 @@ r"""Contain HTML content generators."""
		"ContentGeneratorDict",
		"ContinuousSeriesContentGenerator",
		"DataFrameSummaryContentGenerator",
		"NullValueContentGenerator",
		"PlotColumnContentGenerator",
		"ScatterColumnContentGenerator",
		"TemporalNullValueContentGenerator",
		"TemporalPlotColumnContentGenerator",
		@@ -23,7 +26,10 @@ ]
		from arkas.content.column_cooccurrence import ColumnCooccurrenceContentGenerator
		from arkas.content.continuous_series import ContinuousSeriesContentGenerator
		from arkas.content.frame_summary import DataFrameSummaryContentGenerator
		from arkas.content.mapping import ContentGeneratorDict
		from arkas.content.null_value import NullValueContentGenerator
		from arkas.content.plot_column import PlotColumnContentGenerator
		from arkas.content.scatter_column import ScatterColumnContentGenerator
		from arkas.content.temporal_null_value import TemporalNullValueContentGenerator
		from arkas.content.temporal_plot_column import TemporalPlotColumnContentGenerator
		from arkas.content.vanilla import ContentGenerator

+2

-2

src/arkas/exporter/base.py

		@@ -50,3 +50,3 @@ r"""Contain the base class to implement an output exporter."""
		(exist_ok): False
		(show_metrics): True
		(show_metrics): False
		)
		@@ -155,3 +155,3 @@
		(exist_ok): False
		(show_metrics): True
		(show_metrics): False
		)
		@@ -158,0 +158,0 @@

+1

-1

src/arkas/exporter/metric.py

		@@ -67,3 +67,3 @@ r"""Contain the implementation of a metric exporter."""
		exist_ok: bool = False,
		show_metrics: bool = True,
		show_metrics: bool = False,
		) -> None:
		@@ -70,0 +70,0 @@ self._path = sanitize_path(path)

+1

-1

src/arkas/exporter/sequential.py

		@@ -68,3 +68,3 @@ r"""Contain an exporter that sequentially calls several exporters."""
		(exist_ok): False
		(show_metrics): True
		(show_metrics): False
		)
		@@ -71,0 +71,0 @@ (1): FigureExporter(

+6

-0

src/arkas/output/__init__.py

		@@ -12,4 +12,6 @@ r"""Contain data outputs."""
		"ContentOutput",
		"ContinuousSeriesOutput",
		"DataFrameSummaryOutput",
		"EmptyOutput",
		"NullValueOutput",
		"Output",
		@@ -19,2 +21,3 @@ "OutputDict",
		"ScatterColumnOutput",
		"TemporalNullValueOutput",
		"TemporalPlotColumnOutput",
		@@ -28,2 +31,3 @@ ]
		from arkas.output.content import ContentOutput
		from arkas.output.continuous_series import ContinuousSeriesOutput
		from arkas.output.empty import EmptyOutput
		@@ -33,5 +37,7 @@ from arkas.output.frame_summary import DataFrameSummaryOutput
		from arkas.output.mapping import OutputDict
		from arkas.output.null_value import NullValueOutput
		from arkas.output.plot_column import PlotColumnOutput
		from arkas.output.scatter_column import ScatterColumnOutput
		from arkas.output.temporal_null_value import TemporalNullValueOutput
		from arkas.output.temporal_plot_column import TemporalPlotColumnOutput
		from arkas.output.vanilla import Output

+21

-1

src/arkas/plot/__init__.py

		@@ -5,5 +5,25 @@ r"""Contain plotting functionalities."""

		__all__ = ["binary_precision_recall_curve", "binary_roc_curve"]
		__all__ = [
		"bar_discrete",
		"bar_discrete_temporal",
		"binary_precision_recall_curve",
		"binary_roc_curve",
		"boxplot_continuous",
		"boxplot_continuous_temporal",
		"hist_continuous",
		"hist_continuous2",
		"plot_cdf",
		"plot_null_temporal",
		]

		from arkas.plot.cdf import plot_cdf
		from arkas.plot.continuous import (
		boxplot_continuous,
		boxplot_continuous_temporal,
		hist_continuous,
		hist_continuous2,
		)
		from arkas.plot.discrete import bar_discrete, bar_discrete_temporal
		from arkas.plot.null_temporal import plot_null_temporal
		from arkas.plot.pr import binary_precision_recall_curve
		from arkas.plot.roc import binary_roc_curve

+6

-0

src/arkas/plotter/__init__.py

		@@ -8,2 +8,4 @@ r"""Contain data plotters."""
		"ColumnCooccurrencePlotter",
		"ContinuousSeriesPlotter",
		"NullValuePlotter",
		"PlotColumnPlotter",
		@@ -13,2 +15,3 @@ "Plotter",
		"ScatterColumnPlotter",
		"TemporalNullValuePlotter",
		"TemporalPlotColumnPlotter",
		@@ -19,6 +22,9 @@ ]
		from arkas.plotter.column_cooccurrence import ColumnCooccurrencePlotter
		from arkas.plotter.continuous_series import ContinuousSeriesPlotter
		from arkas.plotter.mapping import PlotterDict
		from arkas.plotter.null_value import NullValuePlotter
		from arkas.plotter.plot_column import PlotColumnPlotter
		from arkas.plotter.scatter_column import ScatterColumnPlotter
		from arkas.plotter.temporal_null_value import TemporalNullValuePlotter
		from arkas.plotter.temporal_plot_column import TemporalPlotColumnPlotter
		from arkas.plotter.vanilla import Plotter

+4

-3

src/arkas/plotter/plot_column.py

		@@ -35,3 +35,3 @@ r"""Contain the implementation of a DataFrame column plotter."""
		Args:
		state: The state containing the DataFrame to analyze.
		state: The state containing the DataFrame to analyze.

		@@ -95,9 +95,10 @@ Returns:
		fig, ax = plt.subplots(**state.figure_config.get_arg("init", {}))

		for col in state.dataframe:
		ax.plot(col.to_numpy(), label=col.name)

		xmin, xmax = 0, state.dataframe.shape[0] - 1
		if xmin < xmax:
		ax.set_xlim(xmin, xmax)
		if yscale := state.figure_config.get_arg("yscale"):
		ax.set_yscale(yscale)

		ax.legend()
		@@ -104,0 +105,0 @@ fig.tight_layout()

+18

-6

src/arkas/plotter/scatter_column.py

		@@ -19,2 +19,3 @@ r"""Contain the implementation of a DataFrame column plotter."""
		from arkas.plotter.vanilla import Plotter
		from arkas.utils.range import find_range

		@@ -95,8 +96,5 @@ if TYPE_CHECKING:
		color = state.dataframe[state.color].to_numpy() if state.color else None
		s = ax.scatter(
		state.dataframe[state.x].to_numpy(),
		state.dataframe[state.y].to_numpy(),
		c=color,
		label=state.color,
		)
		x = state.dataframe[state.x].to_numpy()
		y = state.dataframe[state.y].to_numpy()
		s = ax.scatter(x=x, y=y, c=color, label=state.color)
		if color is not None:
		@@ -106,2 +104,16 @@ fig.colorbar(s)

		xmin, xmax = find_range(
		x,
		xmin=state.figure_config.get_arg("xmin"),
		xmax=state.figure_config.get_arg("xmax"),
		)
		if xmin < xmax:
		ax.set_xlim(xmin, xmax)
		ymin, ymax = find_range(
		y,
		xmin=state.figure_config.get_arg("ymin"),
		xmax=state.figure_config.get_arg("ymax"),
		)
		if ymin < ymax:
		ax.set_ylim(ymin, ymax)
		ax.set_xlabel(state.x)
		@@ -108,0 +120,0 @@ ax.set_ylabel(state.y)

+9

-0

src/arkas/plotter/temporal_plot_column.py

		@@ -25,2 +25,3 @@ r"""Contain the implementation of a DataFrame column plotter."""
		from arkas.plotter.vanilla import Plotter
		from arkas.utils.range import find_range

		@@ -133,2 +134,10 @@ if TYPE_CHECKING:

		xmin, xmax = find_range(
		time,
		xmin=state.figure_config.get_arg("xmin"),
		xmax=state.figure_config.get_arg("xmax"),
		)
		if xmin < xmax:
		ax.set_xlim(xmin, xmax)
		ax.set_xlabel(state.temporal_column)
		if yscale := state.figure_config.get_arg("yscale"):
		@@ -135,0 +144,0 @@ ax.set_yscale(yscale)

+1

-1

src/arkas/runner/analysis.py

		@@ -68,3 +68,3 @@ r"""Contain a simple runner to analyze data."""
		(exist_ok): False
		(show_metrics): True
		(show_metrics): False
		)
		@@ -71,0 +71,0 @@ (lazy): True

+2

-2

src/arkas/runner/base.py

		@@ -57,3 +57,3 @@ r"""Contain the base class to implement a runner."""
		(exist_ok): False
		(show_metrics): True
		(show_metrics): False
		)
		@@ -212,3 +212,3 @@ (lazy): True
		(exist_ok): False
		(show_metrics): True
		(show_metrics): False
		)
		@@ -215,0 +215,0 @@ (lazy): True

+4

-0

src/arkas/state/__init__.py

		@@ -10,4 +10,6 @@ r"""Contain states."""
		"DataFrameState",
		"NullValueState",
		"PrecisionRecallState",
		"ScatterDataFrameState",
		"SeriesState",
		"TemporalDataFrameState",
		@@ -20,4 +22,6 @@ ]
		from arkas.state.dataframe import DataFrameState
		from arkas.state.null_value import NullValueState
		from arkas.state.precision_recall import PrecisionRecallState
		from arkas.state.scatter_dataframe import ScatterDataFrameState
		from arkas.state.series import SeriesState
		from arkas.state.temporal_dataframe import TemporalDataFrameState

arkas - npm Package Compare versions

Improved metrics