deepchecks - npm Package Compare versions

+2

-2

deepchecks.egg-info/PKG-INFO

		Metadata-Version: 2.1
		Name: deepchecks
		Version: 0.18.1
		Version: 0.19.0
		Summary: Package for validating your machine learning model and data
		@@ -9,3 +9,3 @@ Home-page: https://github.com/deepchecks/deepchecks
		License: UNKNOWN
		Download-URL: https://github.com/deepchecks/deepchecks/releases/download/0.18.1/deepchecks-0.18.1.tar.gz
		Download-URL: https://github.com/deepchecks/deepchecks/releases/download/0.19.0/deepchecks-0.19.0.tar.gz
		Project-URL: Documentation, https://docs.deepchecks.com
		@@ -12,0 +12,0 @@ Project-URL: Bug Reports, https://github.com/deepchecks/deepchecks

+6

-4

deepchecks.egg-info/requires.txt

		@@ -35,3 +35,3 @@ pandas<2.2.0,>=1.1.5
		ipykernel>=5.3.0
		ipywidgets<8,>=7.6.5
		ipywidgets>=7.6.5
		jupyter-server>=2.7.2
		@@ -43,6 +43,8 @@
		umap-learn
		transformers<4.37.0,>=4.0.0
		transformers>=4.0.0
		huggingface_hub
		sentence_transformers>=3.0.0

		[nlp-properties]
		fasttext>=0.8.0
		fasttext<0.9.3,>=0.8.0

		@@ -61,3 +63,3 @@ [nlp:python_version < "3.7"]
		opencv-python>=4.5.5.62
		albumentations>=1.1.0
		albumentations<1.4.0,>=1.1.0
		imgaug>=0.4.0
		@@ -64,0 +66,0 @@ seaborn>=0.1.0

+3

-2

deepchecks/core/display.py

		@@ -22,3 +22,4 @@ # ----------------------------------------------------------------------------

		from IPython.core.display import display, display_html
		from IPython.core.display import display_html
		from IPython.display import display
		from ipywidgets import Widget
		@@ -109,3 +110,3 @@
		elif as_widget is True:
		display_html(self.widget_serializer.serialize(
		display(self.widget_serializer.serialize(
		output_id=unique_id,
		@@ -112,0 +113,0 @@ **kwargs

+7

-2

deepchecks/core/serialization/check_result/widget.py

		@@ -252,5 +252,7 @@ # ----------------------------------------------------------------------------
		children = []
		titles = [] # Create a list to store titles

		for i, (name, display) in enumerate(item.items()):
		tab.set_title(i, name)
		# First collect all children and titles
		for name, display in item.items():
		titles.append(name) # Add title to list
		children.append(VBox(children=cls.handle_display(
		@@ -263,3 +265,6 @@ display,

		# Set all properties at once
		tab.children = children
		tab.titles = tuple(titles) # Set all titles at once

		style = '<style>.jupyter-widgets.widget-tab > .p-TabBar .p-TabBar-tab {min-width: fit-content;}</style>'
		@@ -266,0 +271,0 @@ return VBox(children=[

+6

-5

deepchecks/core/serialization/suite_result/widget.py

		@@ -118,7 +118,8 @@ # ----------------------------------------------------------------------------
		])
		return Accordion(
		accordion = Accordion(
		children=[content],
		_titles={'0': self.value.name},
		selected_index='0'
		)
		accordion.set_title(0, self.value.name)
		return accordion

		@@ -163,5 +164,5 @@ def prepare_summary(
		children=children,
		_titles={'0': title},
		selected_index=None
		))
		accordion.set_title(0, title)
		return VBox(children=(
		@@ -205,5 +206,5 @@ # by putting `section_anchor` before the results accordion
		children=(HTML(value='<p>No outputs to show.</p>'),),
		_titles={'0': title},
		selected_index=None
		))
		accordion.set_title(0, title)
		else:
		@@ -232,5 +233,5 @@ section_id = f'{output_id}-section-{get_random_string()}'
		children=(VBox(children=children),),
		_titles={'0': title},
		selected_index=None
		))
		accordion.set_title(0, title)

		@@ -237,0 +238,0 @@ return VBox(children=(

+3

-2

deepchecks/nlp/checks/data_integrity/unknown_tokens.py

		@@ -27,2 +27,3 @@ # ----------------------------------------------------------------------------
		from deepchecks.nlp.text_data import TextData
		from deepchecks.nlp.utils.text_properties_models import check_nltk_resource
		from deepchecks.utils.numbers import round_sig
		@@ -183,6 +184,6 @@ from deepchecks.utils.strings import format_list, format_percent, truncate_string
		# Choose tokenizer based on availability of nltk
		if nltk.download('punkt', quiet=True):
		if check_nltk_resource('punkt_tab'):
		tokenize = nltk.word_tokenize
		else:
		warnings.warn('nltk punkt is not available, using str.split instead to identify individual words. '
		warnings.warn('nltk punkt_tab is not available, using str.split instead to identify individual words. '
		'Please check your internet connection.')
		@@ -189,0 +190,0 @@ tokenize = str.split

+29

-1

deepchecks/nlp/utils/text_properties_models.py

		@@ -21,3 +21,4 @@ # ----------------------------------------------------------------------------
		import requests
		from nltk import corpus
		from nltk import corpus, data
		from nltk import download as nltk_download
		from transformers.utils import logging as transformers_logging
		@@ -28,2 +29,29 @@

		def check_nltk_resource(resource_name: str, resource_path: Optional[str] = None) -> bool:
		"""Check if NLTK resource exists and download if it doesn't.

		Parameters
		----------
		resource_name : str
		Name of the NLTK resource to check/download
		resource_path : str, optional
		Path to check for resource existence. If None, defaults to 'tokenizers/{resource_name}'

		Returns
		-------
		bool
		True if resource exists or was successfully downloaded, False otherwise
		"""
		try:
		# Default to tokenizers path if not specified
		path_to_check = resource_path if resource_path else f'tokenizers/{resource_name}'
		data.find(path_to_check)
		return True
		except LookupError:
		try:
		return nltk_download(resource_name, quiet=True)
		except Exception: # pylint: disable=broad-except
		return False


		def get_create_model_storage(models_storage: Union[pathlib.Path, str, None] = None):
		@@ -30,0 +58,0 @@ """Get the models storage directory and create it if needed."""

+41

-32

deepchecks/nlp/utils/text_properties.py

		@@ -25,4 +25,2 @@ # ----------------------------------------------------------------------------
		import torch.cuda
		from nltk import corpus
		from nltk import download as nltk_download
		from nltk import sent_tokenize, word_tokenize
		@@ -34,3 +32,4 @@ from tqdm import tqdm
		from deepchecks.nlp.utils.text import cut_string, hash_text, normalize_text, remove_punctuation
		from deepchecks.nlp.utils.text_properties_models import get_cmudict_dict, get_fasttext_model, get_transformer_pipeline
		from deepchecks.nlp.utils.text_properties_models import (check_nltk_resource, get_cmudict_dict, get_fasttext_model,
		get_transformer_pipeline)
		from deepchecks.utils.function import run_available_kwargs
		@@ -69,4 +68,4 @@ from deepchecks.utils.strings import SPECIAL_CHARACTERS, format_list
		if hash_key not in sentences_cache:
		if not nltk_download('punkt', quiet=True):
		_warn_if_missing_nltk_dependencies('punkt', 'property')
		if not check_nltk_resource('punkt_tab'):
		_warn_if_missing_nltk_dependencies('punkt_tab', 'property')
		return None
		@@ -220,3 +219,6 @@ sentences_cache[hash_key] = sent_tokenize(text)
		if len(text_to_use) == 0:
		text_to_use = cut_string(sentences[0], MAX_CHARS)
		if len(sentences) > 0:
		text_to_use = cut_string(sentences[0], MAX_CHARS)
		else:
		text_to_use = None
		text_list_to_predict.append(text_to_use)
		@@ -231,7 +233,10 @@ else:
		for text in text_list_to_predict:
		try:
		v = classifier(text)[0]
		results.append(output_formatter(v))
		except Exception: # pylint: disable=broad-except
		if text is None:
		results.append(np.nan)
		else:
		try:
		v = classifier(text)[0]
		results.append(output_formatter(v))
		except Exception: # pylint: disable=broad-except
		results.append(np.nan)
		return results # Return the results if prediction is successful
		@@ -348,6 +353,5 @@
		return np.nan
		if not nltk_download('punkt', quiet=True):
		_warn_if_missing_nltk_dependencies('punkt', 'Lexical Density')
		if not check_nltk_resource('punkt_tab'):
		_warn_if_missing_nltk_dependencies('punkt_tab', 'Lexical Density')
		return np.nan

		all_words = _split_to_words_with_cache(text)
		@@ -364,6 +368,7 @@ if len(all_words) == 0:
		return np.nan
		if not nltk_download('averaged_perceptron_tagger', quiet=True):
		_warn_if_missing_nltk_dependencies('averaged_perceptron_tagger', 'Unique Noun Count')
		if not check_nltk_resource('averaged_perceptron_tagger_eng',
		'taggers/averaged_perceptron_tagger_eng'):
		_warn_if_missing_nltk_dependencies('averaged_perceptron_tagger_eng',
		'Unique Noun Count')
		return np.nan

		unique_words_with_tags = set(textblob.TextBlob(text).tags)
		@@ -383,6 +388,7 @@ return sum(1 for (_, tag) in unique_words_with_tags if tag.startswith('N'))
		if cmudict_dict is None:
		if not nltk_download('cmudict', quiet=True):
		if not check_nltk_resource('cmudict', 'corpora/cmudict'):
		_warn_if_missing_nltk_dependencies('cmudict', 'Reading Ease')
		return np.nan
		cmudict_dict = corpus.cmudict.dict()
		else:
		cmudict_dict = get_cmudict_dict()
		text_sentences = _sample_for_property(text, mode='sentences', limit=DEFAULT_SENTENCE_SAMPLE_SIZE,
		@@ -454,10 +460,11 @@ return_as_list=True)
		return np.nan
		if not nltk_download('punkt', quiet=True):
		_warn_if_missing_nltk_dependencies('punkt', 'Unique Syllables Count')
		if not check_nltk_resource('punkt_tab'):
		_warn_if_missing_nltk_dependencies('punkt_tab', 'Unique Syllables Count')
		return np.nan
		if cmudict_dict is None:
		if not nltk_download('cmudict', quiet=True):
		if not check_nltk_resource('cmudict', 'corpora/cmudict'):
		_warn_if_missing_nltk_dependencies('cmudict', 'Unique Syllables Count')
		return np.nan
		cmudict_dict = corpus.cmudict.dict()
		else:
		cmudict_dict = get_cmudict_dict()

		@@ -491,4 +498,4 @@ text = remove_punctuation(text.lower())
		return np.nan
		if not nltk_download('punkt', quiet=True):
		_warn_if_missing_nltk_dependencies('punkt', 'Sentences Count')
		if not check_nltk_resource('punkt_tab'):
		_warn_if_missing_nltk_dependencies('punkt_tab', 'Sentences Count')
		return np.nan
		@@ -502,10 +509,11 @@ return len(_split_to_sentences_with_cache(text))
		return np.nan
		if not nltk_download('punkt', quiet=True):
		_warn_if_missing_nltk_dependencies('punkt', 'Average Syllable Length')
		if not check_nltk_resource('punkt_tab'):
		_warn_if_missing_nltk_dependencies('punkt_tab', 'Average Syllable Length')
		return np.nan
		if cmudict_dict is None:
		if not nltk_download('cmudict', quiet=True):
		if not check_nltk_resource('cmudict', 'corpora/cmudict'):
		_warn_if_missing_nltk_dependencies('cmudict', 'Average Syllable Length')
		return np.nan
		cmudict_dict = corpus.cmudict.dict()
		else:
		cmudict_dict = get_cmudict_dict()
		sentence_count = len(_split_to_sentences_with_cache(text))
		@@ -557,3 +565,3 @@ text = remove_punctuation(text.lower())
		{'name': 'Unique Noun Count', 'method': unique_noun_count, 'output_type': 'numeric'},
		)
		)

		@@ -571,3 +579,3 @@ ALL_PROPERTIES: Tuple[TextProperty, ...] = \
		{'name': 'Average Syllable Length', 'method': average_syllable_length, 'output_type': 'numeric'},
		) + DEFAULT_PROPERTIES
		) + DEFAULT_PROPERTIES

		@@ -761,7 +769,8 @@ LONG_RUN_PROPERTIES = ('Toxicity', 'Fluency', 'Formality', 'Unique Noun Count')
		if properties_requiring_cmudict:
		if not nltk_download('cmudict', quiet=True):
		if not check_nltk_resource('cmudict', 'corpora/cmudict'):
		_warn_if_missing_nltk_dependencies('cmudict', format_list(properties_requiring_cmudict))
		for prop in properties_requiring_cmudict:
		calculated_properties[prop] = [np.nan] * len(raw_text)
		kwargs['cmudict_dict'] = get_cmudict_dict(use_cache=cache_models)
		else:
		kwargs['cmudict_dict'] = get_cmudict_dict(use_cache=cache_models)

		@@ -768,0 +777,0 @@ if 'Toxicity' in properties_types and 'toxicity_classifier' not in kwargs:

+1

-1

deepchecks/nlp/utils/text.py

		@@ -89,3 +89,3 @@ # ----------------------------------------------------------------------------
		return text
		if nltk.download('punkt', quiet=True):
		if nltk.download('punkt', quiet=True) and nltk.download('punkt_tab', quiet=True):
		tokenize = word_tokenize
		@@ -92,0 +92,0 @@ else:

+4

-4

deepchecks/ppscore.py

		@@ -39,4 +39,4 @@ """PPS (Predictive Power Score) module."""
		import pandas as pd
		from pandas.api.types import (is_bool_dtype, is_categorical_dtype, is_datetime64_any_dtype, is_numeric_dtype,
		is_object_dtype, is_string_dtype, is_timedelta64_dtype)
		from pandas.api.types import (is_bool_dtype, is_datetime64_any_dtype, is_numeric_dtype, is_object_dtype,
		is_string_dtype, is_timedelta64_dtype)
		from sklearn import preprocessing, tree
		@@ -228,3 +228,3 @@ from sklearn.metrics import f1_score, mean_absolute_error

		def _dtype_represents_categories(series) -> bool:
		def _dtype_represents_categories(series: pd.Series) -> bool:
		"""Determine if the dtype of the series represents categorical values."""
		@@ -235,3 +235,3 @@ return (
		or is_string_dtype(series)
		or is_categorical_dtype(series)
		or isinstance(series.dtype, pd.CategoricalDtype)
		)
		@@ -238,0 +238,0 @@

+1

-2

deepchecks/tabular/checks/data_integrity/mixed_nulls.py

		@@ -17,3 +17,2 @@ # ----------------------------------------------------------------------------
		import pandas as pd
		from pandas.api.types import is_categorical_dtype

		@@ -109,3 +108,3 @@ from deepchecks.core import CheckResult, ConditionCategory, ConditionResult
		column_data = df[column_name]
		if is_categorical_dtype(column_data) is True:
		if isinstance(column_data.dtype, pd.CategoricalDtype):
		# NOTE:
		@@ -112,0 +111,0 @@ # 'pandas.Series.value_counts' and 'pandas.Series.apply'

+4

-2

deepchecks/tabular/datasets/regression/airbnb.py

		@@ -93,4 +93,6 @@ # ----------------------------------------------------------------------------

		_TRAIN_DATA_URL = 'https://drive.google.com/uc?export=download&id=1UWkr1BQlyyUkbsW5hHIFTr-x0evZE3Ie'
		_TEST_DATA_URL = 'https://drive.google.com/uc?export=download&id=1lfpWVtDktrnsLUzCN1tkRc1jRbguEz3a'
		_TRAIN_DATA_URL = ('https://raw.githubusercontent.com/deepchecks/deepchecks-datasets/'
		'8dd24134239b9df5d2a3a13cdce38cc22caaaaf4/airbnb_ref_data.csv')
		_TEST_DATA_URL = ('https://raw.githubusercontent.com/deepchecks/deepchecks-datasets/'
		'8dd24134239b9df5d2a3a13cdce38cc22caaaaf4/airbnb_prod_data.csv')
		_target = 'price'
		@@ -97,0 +99,0 @@ _predictions = 'predictions'

+1

-0

deepchecks/tabular/metric_utils/scorers.py

		@@ -264,2 +264,3 @@ # ----------------------------------------------------------------------------
		self.predictions = pd.Series(predictions, index=data.index)
		self._estimator_type = 'classifier'

		@@ -266,0 +267,0 @@ def predict(self, data: pd.DataFrame) -> np.ndarray:

+3

-2

deepchecks/utils/abstracts/weak_segment_abstract.py

		@@ -325,3 +325,4 @@ # ----------------------------------------------------------------------------
		if scorer is not None and dummy_model is not None and label_col is not None:
		leaf_data, leaf_labels = leaf_filter.filter(data_for_search, label_col_for_search)
		leaf_data = leaf_filter.filter(data_for_search)
		leaf_labels = label_col_for_search.loc[leaf_data.index]
		leaf_score = scorer.run_on_data_and_label(dummy_model, leaf_data, leaf_labels)
		@@ -345,3 +346,3 @@ else: # if no scorer is provided, use the average loss_per_sample of samples in the leaf as the score
		grid_searcher = GridSearchCV(DecisionTreeRegressor(random_state=random_state),
		scoring=neg_worst_segment_score, param_grid=search_space, n_jobs=-1, cv=3)
		scoring=neg_worst_segment_score, param_grid=search_space, n_jobs=1, cv=3)
		try:
		@@ -348,0 +349,0 @@ grid_searcher.fit(data_for_search, score_per_sample_for_search)

+6

-12

deepchecks/utils/performance/partition.py

		@@ -14,3 +14,3 @@ # ----------------------------------------------------------------------------
		from copy import deepcopy
		from typing import Callable, List, Optional, Tuple, Union
		from typing import Callable, List

		@@ -50,16 +50,9 @@ import numpy as np

		def filter(self, dataframe: pd.DataFrame, label_col: Optional[pd.Series] = None) -> \
		Union[Tuple[pd.DataFrame, pd.Series], pd.DataFrame]:
		def filter(self, dataframe: pd.DataFrame) -> pd.DataFrame:
		"""Run the filter on given dataframe. Return rows in data frame satisfying the filter properties."""
		if label_col is not None:
		dataframe['temp_label_col'] = label_col
		for func in self.filter_functions:
		dataframe = dataframe.loc[func(dataframe)]
		return dataframe

		if label_col is not None:
		return dataframe.drop(columns=['temp_label_col']), dataframe['temp_label_col']
		else:
		return dataframe


		class DeepchecksBaseFilter(DeepchecksFilter):
		@@ -219,2 +212,3 @@ """Extend DeepchecksFilter class for feature range based filters.
		"""
		# pylint: disable=function-redefined,multiple-statements
		column = dataset.data[column_name]
		@@ -233,6 +227,6 @@ if column_name in dataset.numerical_features:
		if end == percentile_values[-1]:
		f = lambda df, a=start, b=end: (df[column_name] >= a) & (df[column_name] <= b)
		def f(df, a=start, b=end): return (df[column_name] >= a) & (df[column_name] <= b)
		label = f'[{format_number(start)} - {format_number(end)}]'
		else:
		f = lambda df, a=start, b=end: (df[column_name] >= a) & (df[column_name] < b)
		def f(df, a=start, b=end): return (df[column_name] >= a) & (df[column_name] < b)
		label = f'[{format_number(start)} - {format_number(end)})'
		@@ -239,0 +233,0 @@

+2

-2

PKG-INFO

		Metadata-Version: 2.1
		Name: deepchecks
		Version: 0.18.1
		Version: 0.19.0
		Summary: Package for validating your machine learning model and data
		@@ -9,3 +9,3 @@ Home-page: https://github.com/deepchecks/deepchecks
		License: UNKNOWN
		Download-URL: https://github.com/deepchecks/deepchecks/releases/download/0.18.1/deepchecks-0.18.1.tar.gz
		Download-URL: https://github.com/deepchecks/deepchecks/releases/download/0.19.0/deepchecks-0.19.0.tar.gz
		Project-URL: Documentation, https://docs.deepchecks.com
		@@ -12,0 +12,0 @@ Project-URL: Bug Reports, https://github.com/deepchecks/deepchecks

+2

-2

README.md

		@@ -18,7 +18,6 @@ <!--
		![pkgVersion](https://img.shields.io/pypi/v/deepchecks)
		[![Maintainability](https://api.codeclimate.com/v1/badges/970b11794144139975fa/maintainability)](https://codeclimate.com/github/deepchecks/deepchecks/maintainability)
		[![Coverage
		Status](https://coveralls.io/repos/github/deepchecks/deepchecks/badge.svg?branch=main)](https://coveralls.io/github/deepchecks/deepchecks?branch=main)
		<!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
		[![All Contributors](https://img.shields.io/badge/all_contributors-43-orange.svg?style=flat-round)](#https://github.com/deepchecks/deepchecks/blob/main/CONTRIBUTING.rst)
		[![All Contributors](https://img.shields.io/badge/all_contributors-44-orange.svg?style=flat-round)](#https://github.com/deepchecks/deepchecks/blob/main/CONTRIBUTING.rst)
		<!-- ALL-CONTRIBUTORS-BADGE:END -->
		@@ -369,2 +368,3 @@
		<td align="center" valign="top" width="14.28%"><a href="https://www.kaggle.com/rayanaay"><img src="https://avatars.githubusercontent.com/u/55285736?v=4?s=100" width="100px;" alt="AIT ALI YAHIA Rayane"/><br /><sub><b>AIT ALI YAHIA Rayane</b></sub></a><br /><a href="#code-RayanAAY-ops" title="Code">💻</a> <a href="#ideas-RayanAAY-ops" title="Ideas, Planning, & Feedback">🤔</a></td>
		<td align="center" valign="top" width="14.28%"><a href="https://github.com/chris-santiago"><img src="https://avatars.githubusercontent.com/u/40875112?v=4?s=100" width="100px;" alt="Chris Santiago"/><br /><sub><b>Chris Santiago</b></sub></a><br /><a href="#bug-chris-santiago" title="Bug reports">🐛</a> <a href="#code-chris-santiago" title="Code">💻</a></td>
		</tr>
		@@ -371,0 +371,0 @@ </tbody>

+4

-2

requirements/dev-requirements.txt

		@@ -24,3 +24,4 @@ twine

		catboost
		catboost; python_version >= '3.7'
		catboost<=1.2.2; python_version < '3.7'
		lightgbm
		@@ -37,3 +38,4 @@ xgboost<=1.7.5
		numpy>=1.18.5
		opencv-python>=4.1.2
		opencv-python>=4.1.2; python_version > '3.6'
		opencv-python<=4.6.0.66; python_version == '3.6'
		pillow>=7.1.2; python_version < '3.8'
		@@ -40,0 +42,0 @@ pillow>=10.0.1; python_version >= '3.8' # not directly required, pinned by Snyk to avoid a vulnerability

+1

-1

requirements/nlp-prop-requirements.txt

		@@ -1,1 +0,1 @@
		fasttext>=0.8.0
		fasttext>=0.8.0, <0.9.3

+3

-1

requirements/nlp-requirements.txt

		@@ -6,3 +6,5 @@ seqeval>=1.0.0
		umap-learn
		transformers>=4.0.0,<4.37.0
		transformers>=4.0.0
		huggingface_hub
		sentence_transformers>=3.0.0
		tiktoken; python_version >= '3.8'

+1

-1

requirements/requirements.txt

		@@ -11,3 +11,3 @@ pandas>=1.1.5,<2.2.0
		ipykernel>=5.3.0; python_version >= '3.8'
		ipywidgets>=7.6.5,<8; python_version >= '3.8'
		ipywidgets>=7.6.5; python_version >= '3.8'

		@@ -14,0 +14,0 @@ # google colab requirements (python 3.7)

+1

-1

requirements/vision-requirements.txt

		pytorch-ignite>=0.4.8
		opencv-python>=4.5.5.62
		albumentations>=1.1.0
		albumentations>=1.1.0,<1.4.0
		imgaug>=0.4.0
		@@ -5,0 +5,0 @@ seaborn>=0.1.0

+1

-1

VERSION

		@@ -1,1 +0,1 @@
		0.18.1
		0.19.0

deepchecks - npm Package Compare versions

Improved metrics