drpt
Advanced tools
+1
-1
| Metadata-Version: 2.1 | ||
| Name: drpt | ||
| Version: 0.6.0 | ||
| Version: 0.6.1 | ||
| Summary: Tool for preparing a dataset for publishing by dropping, renaming, scaling, and obfuscating columns defined in a recipe. | ||
@@ -5,0 +5,0 @@ Author-email: Constantinos Xanthopoulos <conx@xanthopoulos.info> |
+2
-2
@@ -7,3 +7,3 @@ [build-system] | ||
| name = "drpt" | ||
| version = "0.6.0" | ||
| version = "0.6.1" | ||
| description = "Tool for preparing a dataset for publishing by dropping, renaming, scaling, and obfuscating columns defined in a recipe." | ||
@@ -40,3 +40,3 @@ readme = "README.md" | ||
| [tool.bumpver] | ||
| current_version = "0.6.0" | ||
| current_version = "0.6.1" | ||
| version_pattern = "MAJOR.MINOR.PATCH[PYTAGNUM]" | ||
@@ -43,0 +43,0 @@ commit_message = "Bump version {old_version} -> {new_version}" |
| Metadata-Version: 2.1 | ||
| Name: drpt | ||
| Version: 0.6.0 | ||
| Version: 0.6.1 | ||
| Summary: Tool for preparing a dataset for publishing by dropping, renaming, scaling, and obfuscating columns defined in a recipe. | ||
@@ -5,0 +5,0 @@ Author-email: Constantinos Xanthopoulos <conx@xanthopoulos.info> |
@@ -1,1 +0,1 @@ | ||
| __version__ = "0.6.0" | ||
| __version__ = "0.6.1" |
+34
-6
@@ -230,5 +230,7 @@ #!/usr/bin/env python3.9 | ||
| for col in self.data.select_dtypes(include="number").columns.tolist(): | ||
| # Skip column if it has been obfuscated already | ||
| if col in self.recipe["actions"].get("obfuscate", []): | ||
| continue | ||
| # Skip column if it matches skip-scaling pattern | ||
| skip_scaling = False | ||
@@ -246,7 +248,31 @@ no_scaling = self.recipe["actions"].get("skip-scaling", []) | ||
| min, max = self.limits[col]["min"], self.limits[col]["max"] | ||
| if pd.isna(min): | ||
| min = col_min | ||
| if pd.isna(max): | ||
| max = col_max | ||
| self._report_log("SCALE_CUSTOM", col, f"[{min},{max}]") | ||
| if min == max: | ||
| self._report_log( | ||
| "WARNING", | ||
| col, | ||
| f"Custom limits are the same: {min}. Reverting to min/max", | ||
| ) | ||
| min, max = col_min, col_max | ||
| self._report_log("SCALE_DEFAULT", col, f"[{min},{max}]") | ||
| else: | ||
| if pd.isna(min): | ||
| min = col_min | ||
| self._report_log( | ||
| "WARNING", | ||
| col, | ||
| "Custom min limit is NaN. Generating from data.", | ||
| ) | ||
| if pd.isna(max): | ||
| max = col_max | ||
| self._report_log( | ||
| "WARNING", | ||
| col, | ||
| "Custom max limit is NaN. Generating from data.", | ||
| ) | ||
| self._report_log("SCALE_CUSTOM", col, f"[{min},{max}]") | ||
| assert max > min, ( | ||
| "Max must be greater than min for column " + col | ||
| ) | ||
| if not self.dry_run: | ||
@@ -259,3 +285,5 @@ min_max_scale_limit_cols.append(col) | ||
| dtype=pd.Int64Dtype, na_value=np.nan | ||
| ) | ||
| ), | ||
| min, | ||
| max, | ||
| ) | ||
@@ -262,0 +290,0 @@ ) |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
44805
3.39%447
6.18%