fedops
Advanced tools
| # fedops/utils/fedco/datasetting.py | ||
| from __future__ import annotations | ||
| from typing import Dict, List, Tuple | ||
| import numpy as np | ||
| # -------- Public API -------- | ||
| def build_parts( | ||
| targets: np.ndarray, | ||
| num_clients: int, | ||
| mode_str: str, | ||
| seed: int = 42, | ||
| ) -> List[List[int]]: | ||
| """ | ||
| Build client index lists for Non-IID partitioning. | ||
| Args: | ||
| targets: 1D numpy array of integer class labels (length = N samples). | ||
| num_clients: number of clients to split into (>=1). | ||
| mode_str: one of | ||
| - "iid" | ||
| - "dirichlet:<alpha>" e.g., "dirichlet:0.3" | ||
| - "label_skew:<k>" e.g., "label_skew:2" | ||
| - "qty_skew:beta<b>" e.g., "qty_skew:beta0.5" | ||
| seed: RNG seed for reproducibility. | ||
| Returns: | ||
| List of length `num_clients`, where each item is a list of sample indices | ||
| assigned to that client. | ||
| """ | ||
| if num_clients < 1: | ||
| raise ValueError("num_clients must be >= 1") | ||
| if targets.ndim != 1: | ||
| raise ValueError("targets must be a 1D array of integer labels") | ||
| mode, params = _parse_mode(mode_str) | ||
| if num_clients == 1 or mode == "iid": | ||
| rng = np.random.default_rng(seed) | ||
| idxs = np.arange(len(targets)) | ||
| rng.shuffle(idxs) | ||
| splits = np.array_split(idxs, num_clients) | ||
| return [s.tolist() for s in splits] | ||
| if mode == "dirichlet": | ||
| alpha = float(params["alpha"]) | ||
| return _partition_dirichlet(targets, num_clients, alpha, seed) | ||
| if mode == "label_skew": | ||
| n_labels = int(params["n_labels"]) | ||
| return _partition_label_skew(targets, num_clients, n_labels, seed) | ||
| if mode == "qty_skew": | ||
| beta = float(params["beta"]) | ||
| return _partition_quantity_skew(targets, num_clients, beta, seed) | ||
| raise ValueError(f"Unsupported mode: {mode_str}") | ||
| # -------- Mode parsing -------- | ||
| def _parse_mode(s: str) -> Tuple[str, Dict]: | ||
| """ | ||
| Parse mode string into (mode, params) dict. | ||
| """ | ||
| if not s: | ||
| return "iid", {} | ||
| s = s.strip().lower() | ||
| if s == "iid": | ||
| return "iid", {} | ||
| if s.startswith("dirichlet:"): | ||
| try: | ||
| alpha = float(s.split(":", 1)[1]) | ||
| except Exception as e: | ||
| raise ValueError(f"Invalid dirichlet spec: {s}") from e | ||
| return "dirichlet", {"alpha": alpha} | ||
| if s.startswith("label_skew:"): | ||
| try: | ||
| n = int(s.split(":", 1)[1]) | ||
| except Exception as e: | ||
| raise ValueError(f"Invalid label_skew spec: {s}") from e | ||
| return "label_skew", {"n_labels": n} | ||
| if s.startswith("qty_skew:beta"): | ||
| try: | ||
| beta = float(s.split("beta", 1)[1]) | ||
| except Exception as e: | ||
| raise ValueError(f"Invalid qty_skew spec: {s}") from e | ||
| return "qty_skew", {"beta": beta} | ||
| # Fallback | ||
| return "iid", {} | ||
| # -------- Partition strategies -------- | ||
| def _partition_dirichlet( | ||
| targets: np.ndarray, | ||
| num_clients: int, | ||
| alpha: float, | ||
| seed: int, | ||
| ) -> List[List[int]]: | ||
| """ | ||
| Class-wise Dirichlet sampling over clients; lower alpha => more skew. | ||
| """ | ||
| if alpha <= 0: | ||
| raise ValueError("alpha must be > 0 for dirichlet") | ||
| rng = np.random.default_rng(seed) | ||
| n_classes = int(targets.max()) + 1 | ||
| idx_by_class = [np.where(targets == c)[0] for c in range(n_classes)] | ||
| for arr in idx_by_class: | ||
| rng.shuffle(arr) | ||
| parts: List[List[int]] = [[] for _ in range(num_clients)] | ||
| for idxs in idx_by_class: | ||
| if len(idxs) == 0: | ||
| continue | ||
| p = rng.dirichlet([alpha] * num_clients) | ||
| counts = (p * len(idxs)).astype(int) | ||
| # Adjust rounding so sum(counts) == len(idxs) | ||
| while counts.sum() < len(idxs): | ||
| counts[int(np.argmax(p))] += 1 | ||
| while counts.sum() > len(idxs): | ||
| counts[int(np.argmax(counts))] -= 1 | ||
| start = 0 | ||
| for k in range(num_clients): | ||
| take = int(counts[k]) | ||
| if take > 0: | ||
| parts[k].extend(idxs[start : start + take].tolist()) | ||
| start += take | ||
| for k in range(num_clients): | ||
| rng.shuffle(parts[k]) | ||
| return parts | ||
| def _partition_label_skew( | ||
| targets: np.ndarray, | ||
| num_clients: int, | ||
| n_labels: int, | ||
| seed: int, | ||
| ) -> List[List[int]]: | ||
| """ | ||
| Each client gets samples from only n_labels distinct classes (hard label skew). | ||
| """ | ||
| if n_labels < 1: | ||
| raise ValueError("n_labels must be >= 1") | ||
| rng = np.random.default_rng(seed) | ||
| n_classes = int(targets.max()) + 1 | ||
| idx_by_class = [np.where(targets == c)[0] for c in range(n_classes)] | ||
| for arr in idx_by_class: | ||
| rng.shuffle(arr) | ||
| parts: List[List[int]] = [[] for _ in range(num_clients)] | ||
| # Round-robin class assignment (with wrap-around) | ||
| perm = rng.permutation(n_classes) | ||
| assigned = [] | ||
| for k in range(num_clients): | ||
| start = (k * n_labels) % n_classes | ||
| assigned.append(set(perm[start : start + n_labels])) | ||
| # Distribute each class to the clients that were assigned that class | ||
| for c in range(n_classes): | ||
| candidates = [k for k in range(num_clients) if c in assigned[k]] | ||
| if not candidates: | ||
| candidates = [int(rng.integers(0, num_clients))] | ||
| splits = np.array_split(idx_by_class[c], len(candidates)) | ||
| for k, chunk in zip(candidates, splits): | ||
| parts[k].extend(chunk.tolist()) | ||
| for k in range(num_clients): | ||
| rng.shuffle(parts[k]) | ||
| return parts | ||
| def _partition_quantity_skew( | ||
| targets: np.ndarray, | ||
| num_clients: int, | ||
| beta: float, | ||
| seed: int, | ||
| ) -> List[List[int]]: | ||
| """ | ||
| Vary only the number of samples per client (no class preference). | ||
| Lower beta => more variance in quantities. | ||
| """ | ||
| if beta <= 0: | ||
| raise ValueError("beta must be > 0 for qty_skew") | ||
| rng = np.random.default_rng(seed) | ||
| idxs = np.arange(len(targets)) | ||
| rng.shuffle(idxs) | ||
| p = rng.dirichlet([beta] * num_clients) | ||
| counts = (p * len(idxs)).astype(int) | ||
| # Adjust rounding | ||
| while counts.sum() < len(idxs): | ||
| counts[int(np.argmax(p))] += 1 | ||
| while counts.sum() > len(idxs): | ||
| counts[int(np.argmax(counts))] -= 1 | ||
| parts: List[List[int]] = [] | ||
| start = 0 | ||
| for k in range(num_clients): | ||
| take = int(counts[k]) | ||
| parts.append(idxs[start : start + take].tolist()) | ||
| start += take | ||
| return parts |
| Metadata-Version: 2.2 | ||
| Name: fedops | ||
| Version: 1.1.30.0 | ||
| Version: 1.1.30.1 | ||
| Summary: FL Lifecycle Operations Management Platform | ||
@@ -5,0 +5,0 @@ Home-page: https://github.com/gachon-CCLab/FedOps.git |
@@ -34,2 +34,3 @@ README.md | ||
| fedops/utils/fedco/best_keeper.py | ||
| fedops/utils/fedco/datasetting.py | ||
| fedops/utils/fedxai/__init__.py | ||
@@ -36,0 +37,0 @@ fedops/utils/fedxai/gradcam.py |
@@ -21,2 +21,3 @@ # Copyright 2020 Adap GmbH. All Rights Reserved. | ||
| "best_keeper" | ||
| "datasetting" | ||
| ] |
+1
-1
| Metadata-Version: 2.2 | ||
| Name: fedops | ||
| Version: 1.1.30.0 | ||
| Version: 1.1.30.1 | ||
| Summary: FL Lifecycle Operations Management Platform | ||
@@ -5,0 +5,0 @@ Home-page: https://github.com/gachon-CCLab/FedOps.git |
+1
-1
@@ -7,3 +7,3 @@ from setuptools import setup, find_packages | ||
| name='fedops', | ||
| version='1.1.30.0', | ||
| version='1.1.30.1', | ||
| author='Semo Yang', | ||
@@ -10,0 +10,0 @@ author_email='tpah20@gmail.com', |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
150417
4.48%41
2.5%2987
6.34%