lapx
Advanced tools
| # Copyright (c) 2025 Ratha SIV | MIT License | ||
| import numpy as np | ||
| from typing import Tuple, Union | ||
| from ._lapjv import lapjv as _lapjv | ||
| def lapjv( | ||
| cost: np.ndarray, | ||
| extend_cost: bool = False, | ||
| cost_limit: float = np.inf, | ||
| return_cost: bool = True, | ||
| ) -> Union[ | ||
| Tuple[float, np.ndarray, np.ndarray], | ||
| Tuple[np.ndarray, np.ndarray], | ||
| ]: | ||
| """ | ||
| Solve the Linear Assignment Problem using the Jonker-Volgenant (JV) algorithm. | ||
| This wrapper returns lapjv-style mapping vectors (x, y), where: | ||
| - x[i] is the assigned column index for row i, or -1 if unassigned. | ||
| - y[j] is the assigned row index for column j, or -1 if unassigned. | ||
| Parameters | ||
| ---------- | ||
| cost : np.ndarray, shape (N, M) | ||
| 2D cost matrix. Entry cost[i, j] is the cost of assigning row i to column j. | ||
| Any float dtype is accepted; internally a single contiguous float64 buffer is used when needed. | ||
| extend_cost : bool, default False | ||
| Permit rectangular inputs by zero-padding to a square matrix. | ||
| See the unified augmentation policy below. | ||
| cost_limit : float, default np.inf | ||
| When finite, the solver augments to size (N+M) with sentinel edges of cost_limit/2 | ||
| and a bottom-right zero block. This models a per-edge "reject" cost and allows | ||
| rectangular inputs even if extend_cost=False. | ||
| return_cost : bool, default True | ||
| If True, include the total assignment cost as the first return value. | ||
| The total is computed from the ORIGINAL (un-augmented/unpadded) input array. | ||
| Returns | ||
| ------- | ||
| If return_cost is True: | ||
| total_cost : float | ||
| Sum of costs over matched pairs, computed on the ORIGINAL input. | ||
| x : np.ndarray[int32] with shape (N,) | ||
| Mapping from rows to columns; -1 for unassigned rows. | ||
| y : np.ndarray[int32] with shape (M,) | ||
| Mapping from columns to rows; -1 for unassigned columns. | ||
| Else: | ||
| x : np.ndarray[int32] with shape (N,) | ||
| y : np.ndarray[int32] with shape (M,) | ||
| Unified augmentation policy | ||
| --------------------------- | ||
| - If cost_limit < inf: always augment to (N+M) to model per-edge rejects (rectangular allowed). | ||
| - Else if (N != M) or extend_cost=True: zero-pad to a square of size max(N, M). | ||
| - Else (square, un-augmented): run on the given square matrix. | ||
| Notes | ||
| ----- | ||
| - Orientation is normalized internally (kernel works with rows <= cols); outputs are mapped | ||
| back to the ORIGINAL orientation before returning. | ||
| - For zero-sized dimensions, the solver returns 0.0 (if requested) and all -1 mappings. | ||
| - This wrapper forwards directly to the Cython implementation without altering dtypes. | ||
| """ | ||
| return _lapjv(cost, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=return_cost) |
| # Copyright (c) 2025 Ratha SIV | MIT License | ||
| import numpy as np | ||
| from typing import Tuple, Union | ||
| from ._lapjvc import lapjvc as _lapjvc # type: ignore | ||
| def lapjvc( | ||
| cost: np.ndarray, | ||
| return_cost: bool = True, | ||
| ) -> Union[ | ||
| Tuple[float, np.ndarray, np.ndarray], | ||
| Tuple[np.ndarray, np.ndarray], | ||
| ]: | ||
| """ | ||
| Solve the Linear Assignment Problem using the classic dense Jonker-Volgenant algorithm. | ||
| This is a thin wrapper around the C++ binding that computes an optimal assignment for a 2D | ||
| cost matrix. It returns row/column index arrays (JVX-like) matching SciPy's | ||
| linear_sum_assignment ordering. | ||
| Parameters | ||
| ---------- | ||
| cost : np.ndarray, shape (M, N) | ||
| 2D cost matrix. Supported dtypes: int32, int64, float32, float64. | ||
| - Rectangular inputs are handled internally (the dense solver pads as needed). | ||
| - NaN entries (for float types) are treated as forbidden assignments. | ||
| return_cost : bool, default True | ||
| If True, return (total_cost, row_indices, col_indices). | ||
| If False, return only (row_indices, col_indices). | ||
| Returns | ||
| ------- | ||
| If return_cost is True: | ||
| total_cost : float | ||
| Sum of cost at the selected (row, col) pairs. | ||
| row_indices : np.ndarray with shape (K,), dtype int64 (platform-dependent via NumPy) | ||
| Row indices of the assignment. | ||
| col_indices : np.ndarray with shape (K,), dtype int64 (platform-dependent via NumPy) | ||
| Column indices of the assignment. | ||
| Else: | ||
| row_indices, col_indices | ||
| Notes | ||
| ----- | ||
| - This is the classic dense JV routine; for very large, sparse, or otherwise | ||
| structured problems, consider using lapjv/lapjvx variants optimized for those cases. | ||
| - Forbidden assignments can be encoded with np.nan (float inputs). | ||
| """ | ||
| return _lapjvc(cost, return_cost=return_cost) |
| # Copyright (c) 2025 Ratha SIV | MIT License | ||
| import os | ||
| import numpy as np | ||
| from typing import List, Tuple, Union | ||
| from concurrent.futures import ThreadPoolExecutor, as_completed | ||
| from ._lapjvs_wp import lapjvs as _lapjvs_single | ||
| from ._lapjvs_wp import lapjvsa as _lapjvsa_single | ||
| def _normalize_threads(n_threads: int) -> int: | ||
| if n_threads is None or n_threads == 0: | ||
| cpu = os.cpu_count() or 1 | ||
| return max(1, int(cpu)) | ||
| return max(1, int(n_threads)) | ||
| def _solve_one_jvs( | ||
| a2d: np.ndarray, | ||
| extend_cost: bool, | ||
| prefer_float32: bool, | ||
| jvx_like: bool, | ||
| return_cost: bool, | ||
| ): | ||
| # Calls the proven single-instance wrapper; it releases the GIL internally. | ||
| return _lapjvs_single( | ||
| a2d, extend_cost=extend_cost, return_cost=return_cost, | ||
| jvx_like=jvx_like, prefer_float32=prefer_float32 | ||
| ) | ||
| def _solve_one_jvsa( | ||
| a2d: np.ndarray, | ||
| extend_cost: bool, | ||
| prefer_float32: bool, | ||
| return_cost: bool, | ||
| ): | ||
| return _lapjvsa_single( | ||
| a2d, extend_cost=extend_cost, return_cost=return_cost, | ||
| prefer_float32=prefer_float32 | ||
| ) | ||
| def lapjvs_batch( | ||
| costs: np.ndarray, | ||
| extend_cost: bool = False, | ||
| return_cost: bool = True, | ||
| n_threads: int = 0, | ||
| prefer_float32: bool = True, | ||
| ) -> Union[ | ||
| Tuple[np.ndarray, List[np.ndarray], List[np.ndarray]], | ||
| Tuple[List[np.ndarray], List[np.ndarray]] | ||
| ]: | ||
| """ | ||
| Batched lapjvs solver with a thread pool. | ||
| For each 2D cost matrix in a 3D batch, this function runs `lapjvs` and | ||
| aggregates the per-instance results. It preserves the order of the batch | ||
| in the outputs. | ||
| Parameters | ||
| ---------- | ||
| costs : np.ndarray, shape (B, N, M) | ||
| Batch of cost matrices (float32/float64). Each slice `costs[b]` is | ||
| a single LAP instance. | ||
| extend_cost : bool, default False | ||
| If True, rectangular instances are solved via internal zero-padding. | ||
| If False, each instance must be square or a ValueError is raised. | ||
| This is forwarded to the single-instance solver. | ||
| return_cost : bool, default True | ||
| If True, return per-instance totals as the first output. | ||
| n_threads : int, default 0 | ||
| Number of worker threads. When 0 or None, uses `os.cpu_count()`. | ||
| Actual workers are capped to the batch size. | ||
| prefer_float32 : bool, default True | ||
| Hint to run each kernel in float32 (forwarded to the single solver). | ||
| Returns | ||
| ------- | ||
| If return_cost is True: | ||
| totals : np.ndarray, shape (B,), float64 | ||
| Total assignment cost for each instance, computed from the ORIGINAL | ||
| per-instance cost matrix. | ||
| rows_list : List[np.ndarray[int64]] | ||
| For each b, a 1D array of assigned row indices (length K_b). | ||
| cols_list : List[np.ndarray[int64]] | ||
| For each b, a 1D array of assigned col indices (length K_b). | ||
| Else: | ||
| rows_list, cols_list | ||
| Raises | ||
| ------ | ||
| ValueError | ||
| - If `costs` is not a 3D array. | ||
| - If any instance is rectangular while `extend_cost=False`. | ||
| Notes | ||
| ----- | ||
| - Threading: | ||
| The underlying native kernel releases the GIL, so using multiple threads | ||
| can accelerate large batches on multi-core systems. | ||
| - Dtypes: | ||
| Each instance may be float32 or float64; the kernel selection and casting | ||
| follow the single-instance rules. Totals are float64. | ||
| """ | ||
| A = np.asarray(costs) | ||
| if A.ndim != 3: | ||
| raise ValueError("3-dimensional array expected [B, N, M]") | ||
| B = A.shape[0] | ||
| threads = _normalize_threads(n_threads) | ||
| totals = np.empty((B,), dtype=np.float64) if return_cost else None | ||
| rows_list: List[np.ndarray] = [None] * B # type: ignore | ||
| cols_list: List[np.ndarray] = [None] * B # type: ignore | ||
| def work(bi: int): | ||
| a2d = A[bi] | ||
| if return_cost: | ||
| total, rows, cols = _solve_one_jvs( # type: ignore | ||
| a2d, extend_cost=extend_cost, prefer_float32=prefer_float32, | ||
| jvx_like=True, return_cost=True | ||
| ) | ||
| return bi, total, rows, cols | ||
| else: | ||
| rows, cols = _solve_one_jvs( # type: ignore | ||
| a2d, extend_cost=extend_cost, prefer_float32=prefer_float32, | ||
| jvx_like=True, return_cost=False | ||
| ) | ||
| return bi, None, rows, cols | ||
| if threads == 1 or B == 1: | ||
| for bi in range(B): | ||
| idx, t, r, c = work(bi) | ||
| if return_cost: | ||
| totals[idx] = float(t) # type: ignore | ||
| rows_list[idx] = np.asarray(r, dtype=np.int64) | ||
| cols_list[idx] = np.asarray(c, dtype=np.int64) | ||
| else: | ||
| # Cap workers to the batch size | ||
| with ThreadPoolExecutor(max_workers=min(threads, B)) as ex: | ||
| futures = [ex.submit(work, bi) for bi in range(B)] | ||
| for fut in as_completed(futures): | ||
| idx, t, r, c = fut.result() | ||
| if return_cost: | ||
| totals[idx] = float(t) # type: ignore | ||
| rows_list[idx] = np.asarray(r, dtype=np.int64) | ||
| cols_list[idx] = np.asarray(c, dtype=np.int64) | ||
| if return_cost: | ||
| return np.asarray(totals, dtype=np.float64), rows_list, cols_list # type: ignore | ||
| else: | ||
| return rows_list, cols_list | ||
| def lapjvsa_batch( | ||
| costs: np.ndarray, | ||
| extend_cost: bool = False, | ||
| return_cost: bool = True, | ||
| n_threads: int = 0, | ||
| prefer_float32: bool = True, | ||
| ) -> Union[Tuple[np.ndarray, List[np.ndarray]], List[np.ndarray]]: | ||
| """ | ||
| Batched lapjvsa solver, returning (K_b, 2) arrays per instance. | ||
| Runs `lapjvsa` on each (N, M) slice of a (B, N, M) batch and aggregates | ||
| the results while preserving order. | ||
| Parameters | ||
| ---------- | ||
| costs : np.ndarray, shape (B, N, M) | ||
| Batch of cost matrices. | ||
| extend_cost : bool, default False | ||
| If True, rectangular instances are solved via internal zero-padding. | ||
| return_cost : bool, default True | ||
| If True, include per-instance totals as the first returned array. | ||
| n_threads : int, default 0 | ||
| Number of worker threads. 0 or None uses `os.cpu_count()`. | ||
| prefer_float32 : bool, default True | ||
| Forwarded to the single-instance solver. | ||
| Returns | ||
| ------- | ||
| If return_cost is True: | ||
| totals : np.ndarray, shape (B,), float64 | ||
| pairs_list : List[np.ndarray[int64] with shape (K_b, 2)] | ||
| Else: | ||
| pairs_list | ||
| Raises | ||
| ------ | ||
| ValueError | ||
| If `costs` is not a 3D array, or if any instance is rectangular while | ||
| `extend_cost=False`. | ||
| Notes | ||
| ----- | ||
| - See `lapjvsa` for details on dtype handling and total-cost accumulation. | ||
| - Results are reassembled in batch order irrespective of threading. | ||
| """ | ||
| A = np.asarray(costs) | ||
| if A.ndim != 3: | ||
| raise ValueError("3-dimensional array expected [B, N, M]") | ||
| B = A.shape[0] | ||
| threads = _normalize_threads(n_threads) | ||
| totals = np.empty((B,), dtype=np.float64) if return_cost else None | ||
| pairs_list: List[np.ndarray] = [None] * B # type: ignore | ||
| def work(bi: int): | ||
| a2d = A[bi] | ||
| if return_cost: | ||
| total, pairs = _solve_one_jvsa( | ||
| a2d, extend_cost=extend_cost, prefer_float32=prefer_float32, return_cost=True | ||
| ) | ||
| return bi, total, pairs | ||
| else: | ||
| pairs = _solve_one_jvsa( | ||
| a2d, extend_cost=extend_cost, prefer_float32=prefer_float32, return_cost=False | ||
| ) | ||
| return bi, None, pairs | ||
| if threads == 1 or B == 1: | ||
| for bi in range(B): | ||
| idx, t, P = work(bi) | ||
| if return_cost: | ||
| totals[idx] = float(t) # type: ignore | ||
| pairs_list[idx] = np.asarray(P, dtype=np.int64) | ||
| else: | ||
| # Cap workers to the batch size | ||
| with ThreadPoolExecutor(max_workers=min(threads, B)) as ex: | ||
| futures = [ex.submit(work, bi) for bi in range(B)] | ||
| for fut in as_completed(futures): | ||
| idx, t, P = fut.result() | ||
| if return_cost: | ||
| totals[idx] = float(t) # type: ignore | ||
| pairs_list[idx] = np.asarray(P, dtype=np.int64) | ||
| if return_cost: | ||
| return np.asarray(totals, dtype=np.float64), pairs_list # type: ignore | ||
| else: | ||
| return pairs_list |
| # Copyright (c) 2025 Ratha SIV | MIT License | ||
| import numpy as np | ||
| from typing import Optional, Tuple, Union | ||
| from ._lapjvs import lapjvs_native as _lapjvs_native # type: ignore | ||
| from ._lapjvs import lapjvs_float32 as _lapjvs_float32 # type: ignore | ||
| from ._lapjvs import lapjvsa_native as _lapjvsa_native # type: ignore | ||
| from ._lapjvs import lapjvsa_float32 as _lapjvsa_float32 # type: ignore | ||
| def lapjvs( | ||
| cost: np.ndarray, | ||
| extend_cost: Optional[bool] = None, | ||
| return_cost: bool = True, | ||
| jvx_like: bool = True, | ||
| prefer_float32: bool = True, | ||
| ) -> Union[ | ||
| Tuple[float, np.ndarray, np.ndarray], | ||
| Tuple[np.ndarray, np.ndarray] | ||
| ]: | ||
| """ | ||
| This function wraps a high-performance JV solver and provides flexible | ||
| I/O to match either lapjv-style vector outputs (x, y) or lapjvx/SciPy-style | ||
| pair lists (rows, cols). It handles rectangular inputs by zero-padding to a | ||
| square matrix internally when requested. | ||
| Parameters | ||
| ---------- | ||
| cost : np.ndarray, shape (n, m) | ||
| The cost matrix. Must be 2D and a real floating dtype. Values are treated | ||
| as minimization costs. Rectangular matrices are supported via internal | ||
| zero-padding when `extend_cost=True` or `extend_cost=None and n != m`. | ||
| extend_cost : Optional[bool], default None | ||
| Controls how rectangular inputs are handled: | ||
| - True: Always zero-pad to a square internally (if needed). | ||
| - False: Require a square matrix, otherwise raise ValueError. | ||
| - None: Auto mode; pad iff the input is rectangular. | ||
| return_cost : bool, default True | ||
| If True, include the total assignment cost as the first return value. | ||
| The total is always recomputed from the ORIGINAL input array `cost` | ||
| (float64 accumulation) to match previous numeric behavior. | ||
| jvx_like : bool, default True | ||
| Selects the output format. | ||
| - True: Return lapjvx/SciPy-style indexing arrays: | ||
| return_cost=True -> (total_cost: float, rows: (k,), cols: (k,)) | ||
| return_cost=False -> (rows: (k,), cols: (k,)) | ||
| Here, `rows[i]` is assigned to `cols[i]`. | ||
| - False: Return lapjv-style mapping vectors: | ||
| return_cost=True -> (total_cost: float, x: (n0,), y: (m0,)) | ||
| return_cost=False -> (x: (n0,), y: (m0,)) | ||
| `x[i]` gives the assigned column for row i or -1 if unassigned. | ||
| `y[j]` gives the assigned row for column j or -1 if unassigned. | ||
| prefer_float32 : bool, default True | ||
| When True, the solver kernel runs in float32 to reduce memory bandwidth | ||
| and improve speed. When False and the input is float64, the kernel runs | ||
| in float64. Regardless of kernel dtype, the returned total cost is | ||
| recomputed against the ORIGINAL `cost` array. | ||
| Returns | ||
| ------- | ||
| See `jvx_like` and `return_cost` above for exact signatures. In all cases, | ||
| index arrays are int64 and refer to indices in the ORIGINAL orientation of | ||
| `cost` (not the internally transposed one). | ||
| Raises | ||
| ------ | ||
| ValueError | ||
| - If `cost` is not a 2D array. | ||
| - If `extend_cost=False` and the input matrix is rectangular. | ||
| Notes | ||
| ----- | ||
| - Rectangular handling: | ||
| Internally, the solver normalizes orientation so that the working matrix | ||
| has rows <= cols. Rectangular problems are modeled by zero-padding on | ||
| the right and/or bottom to become square. Only assignments within the | ||
| original (n, m) region are returned and used for the total. | ||
| - Dtype: | ||
| The kernel may operate in float32 or float64, but accumulation for the | ||
| returned total cost is performed in float64 on the ORIGINAL `cost`. | ||
| """ | ||
| # Keep the original array to compute the final cost from it (preserves previous behavior) | ||
| A = np.asarray(cost) | ||
| if A.ndim != 2: | ||
| raise ValueError("cost must be a 2D array") | ||
| n0, m0 = A.shape | ||
| transposed = False | ||
| # Normalize orientation for performance: let the kernel see rows <= cols. | ||
| if n0 > m0: | ||
| B = np.ascontiguousarray(A.T) | ||
| transposed = True | ||
| else: | ||
| B = np.ascontiguousarray(A) | ||
| n, m = B.shape | ||
| extend = (n != m) if (extend_cost is None) else bool(extend_cost) | ||
| # Choose backend and working dtype for the solver only | ||
| use_float32_kernel = not ((prefer_float32 is False) and (B.dtype == np.float64)) | ||
| if use_float32_kernel: | ||
| _kernel = _lapjvs_float32 | ||
| work_base = np.ascontiguousarray(B, dtype=np.float32) | ||
| else: | ||
| _kernel = _lapjvs_native | ||
| work_base = np.ascontiguousarray(B, dtype=np.float64) | ||
| def _rows_cols_from_x(x_vec: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: | ||
| if x_vec.size == 0: | ||
| return np.empty((0,), dtype=np.int64), np.empty((0,), dtype=np.int64) | ||
| mask = x_vec >= 0 | ||
| rows_b = np.nonzero(mask)[0].astype(np.int64, copy=False) | ||
| cols_b = x_vec[mask].astype(np.int64, copy=False) | ||
| if not transposed: | ||
| return rows_b, cols_b | ||
| # Map back to original orientation (A): swap row/col | ||
| return cols_b, rows_b | ||
| if not extend: | ||
| # Square: call solver directly on chosen dtype, compute total from ORIGINAL A | ||
| if n != m: | ||
| # Guard (per docstring): if extend_cost=False, require square input | ||
| raise ValueError("extend_cost=False requires a square cost matrix") | ||
| x_raw_obj, y_raw_obj = _kernel(work_base) | ||
| x_raw_b = np.asarray(x_raw_obj, dtype=np.int64) | ||
| if jvx_like: | ||
| rows_a, cols_a = _rows_cols_from_x(x_raw_b) | ||
| if return_cost: | ||
| total = float(A[rows_a, cols_a].sum()) if rows_a.size else 0.0 | ||
| return total, rows_a, cols_a | ||
| else: | ||
| return rows_a, cols_a | ||
| else: | ||
| # Return vectors (x, y) in original orientation | ||
| y_raw_b = np.asarray(y_raw_obj, dtype=np.int64) | ||
| if not transposed: | ||
| if return_cost: | ||
| total = float(A[np.arange(n), x_raw_b].sum()) if n > 0 else 0.0 | ||
| return total, x_raw_b, y_raw_b | ||
| else: | ||
| return x_raw_b, y_raw_b | ||
| # transposed square should not happen (n0==m0 implies no transpose), but keep safe mapping | ||
| # Build pairs from B then map to A vectors | ||
| rows_a, cols_a = _rows_cols_from_x(x_raw_b) | ||
| x_out = np.full(n0, -1, dtype=np.int64) | ||
| if rows_a.size: | ||
| x_out[rows_a] = cols_a | ||
| y_out = np.full(m0, -1, dtype=np.int64) | ||
| if rows_a.size: | ||
| y_out[cols_a] = rows_a | ||
| if return_cost: | ||
| total = float(A[rows_a, cols_a].sum()) if rows_a.size else 0.0 | ||
| return total, x_out, y_out | ||
| else: | ||
| return x_out, y_out | ||
| # Rectangular: zero-pad to square (in B space), solve, map back; compute total from ORIGINAL A | ||
| size = max(n, m) | ||
| padded = np.empty((size, size), dtype=work_base.dtype) | ||
| # copy original submatrix | ||
| padded[:n, :m] = work_base | ||
| if m < size: | ||
| padded[:n, m:] = 0 | ||
| if n < size: | ||
| padded[n:, :] = 0 | ||
| x_pad_obj, y_pad_obj = _kernel(padded) | ||
| x_pad_b = np.asarray(x_pad_obj, dtype=np.int64) | ||
| # Trim to original rectangle (B space), then map to A space if needed | ||
| cols_pad_n = x_pad_b[:n] | ||
| mask_r_b = (cols_pad_n >= 0) & (cols_pad_n < m) | ||
| # Prepare pairs in A-space for convenience | ||
| if mask_r_b.any(): | ||
| rows_b = np.nonzero(mask_r_b)[0].astype(np.int64, copy=False) | ||
| cols_b = cols_pad_n[mask_r_b].astype(np.int64, copy=False) | ||
| if transposed: | ||
| rows_a = cols_b | ||
| cols_a = rows_b | ||
| else: | ||
| rows_a = rows_b | ||
| cols_a = cols_b | ||
| else: | ||
| rows_a = np.empty((0,), dtype=np.int64) | ||
| cols_a = np.empty((0,), dtype=np.int64) | ||
| if jvx_like: | ||
| total = float(A[rows_a, cols_a].sum()) if (return_cost and rows_a.size) else 0.0 | ||
| return (total, rows_a, cols_a) if return_cost else (rows_a, cols_a) | ||
| # lapjv-like outputs (vectorized) in ORIGINAL orientation | ||
| x_out = np.full(n0, -1, dtype=np.int64) | ||
| y_out = np.full(m0, -1, dtype=np.int64) | ||
| if rows_a.size: | ||
| x_out[rows_a] = cols_a | ||
| y_out[cols_a] = rows_a | ||
| if return_cost and rows_a.size: | ||
| total = float(A[rows_a, cols_a].sum()) | ||
| else: | ||
| total = 0.0 | ||
| return (total, x_out, y_out) if return_cost else (x_out, y_out) | ||
| def lapjvsa( | ||
| cost: np.ndarray, | ||
| extend_cost: Optional[bool] = None, | ||
| return_cost: bool = True, | ||
| prefer_float32: bool = True, | ||
| ) -> Union[ | ||
| Tuple[float, np.ndarray], | ||
| np.ndarray | ||
| ]: | ||
| """ | ||
| This variant returns a compact pairs array of shape (K, 2), where each row | ||
| is a (row_index, col_index) assignment in the ORIGINAL orientation of the | ||
| input matrix. Rectangular inputs are handled by internal zero-padding if | ||
| requested. | ||
| Parameters | ||
| ---------- | ||
| cost : np.ndarray, shape (n, m) | ||
| Cost matrix (float32/float64). Must be 2D. | ||
| extend_cost : Optional[bool], default None | ||
| Rectangular handling: | ||
| - True: Zero-pad to square internally (if needed). | ||
| - False: Require square, else raise ValueError. | ||
| - None: Auto; pad iff rectangular. | ||
| return_cost : bool, default True | ||
| If True, include the total cost as the first return value. The total is | ||
| computed from the ORIGINAL input matrix. | ||
| prefer_float32 : bool, default True | ||
| Hint to run the solver kernel in float32 for performance. When False and | ||
| the input is float64, the kernel uses float64. | ||
| Returns | ||
| ------- | ||
| If return_cost is True: | ||
| (total_cost: float, pairs: np.ndarray[int64] with shape (K, 2)) | ||
| Else: | ||
| pairs: np.ndarray[int64] with shape (K, 2) | ||
| Raises | ||
| ------ | ||
| ValueError | ||
| If `cost` is not 2D, or if `extend_cost=False` and `cost` is rectangular. | ||
| Notes | ||
| ----- | ||
| - Orientation is normalized internally so the kernel sees rows <= cols. | ||
| Returned pairs are always mapped back to the ORIGINAL orientation. | ||
| - Pairs only include assignments within the original (n, m) region for | ||
| rectangular inputs. | ||
| - Total is accumulated in float64 from the ORIGINAL `cost`. | ||
| """ | ||
| A = np.asarray(cost) | ||
| if A.ndim != 2: | ||
| raise ValueError("cost must be a 2D array") | ||
| n0, m0 = A.shape | ||
| transposed = False | ||
| # Normalize orientation for performance | ||
| if n0 > m0: | ||
| B = np.ascontiguousarray(A.T) | ||
| transposed = True | ||
| else: | ||
| B = np.ascontiguousarray(A) | ||
| n, m = B.shape | ||
| extend = (n != m) if (extend_cost is None) else bool(extend_cost) | ||
| # Select dtype/backend | ||
| use_f32 = not ((prefer_float32 is False) and (B.dtype == np.float64)) | ||
| wdtype = np.float32 if use_f32 else (B.dtype if B.dtype in (np.float32, np.float64) else np.float64) | ||
| if not extend: | ||
| if n != m: | ||
| raise ValueError("extend_cost=False requires a square cost matrix") | ||
| work = np.ascontiguousarray(B, dtype=wdtype) | ||
| pairs_b_obj = (_lapjvsa_float32(work) if use_f32 else _lapjvsa_native(work)) | ||
| pairs_b = np.asarray(pairs_b_obj, dtype=np.int64) | ||
| # Map pairs back to original orientation | ||
| if transposed and pairs_b.size: | ||
| pairs_a = pairs_b[:, ::-1].astype(np.int64, copy=False) | ||
| else: | ||
| pairs_a = pairs_b | ||
| if return_cost: | ||
| if pairs_a.size: | ||
| r = pairs_a[:, 0]; c = pairs_a[:, 1] | ||
| total = float(A[r, c].sum()) | ||
| else: | ||
| total = 0.0 | ||
| return total, pairs_a | ||
| return pairs_a | ||
| # Rectangular: zero-pad in B space, solve, trim, map back to A | ||
| size = max(n, m) | ||
| padded = np.empty((size, size), dtype=wdtype) | ||
| padded[:n, :m] = B.astype(wdtype, copy=False) | ||
| if m < size: | ||
| padded[:n, m:] = 0 | ||
| if n < size: | ||
| padded[n:, :] = 0 | ||
| pairs_pad_b_obj = (_lapjvsa_float32(padded) if use_f32 else _lapjvsa_native(padded)) | ||
| pairs_pad_b = np.asarray(pairs_pad_b_obj, dtype=np.int64) | ||
| if pairs_pad_b.size == 0 or n == 0 or m == 0: | ||
| pairs_a = np.empty((0, 2), dtype=np.int64) | ||
| total = 0.0 | ||
| else: | ||
| r_b = pairs_pad_b[:, 0] | ||
| c_b = pairs_pad_b[:, 1] | ||
| mask_b = (r_b >= 0) & (r_b < n) & (c_b >= 0) & (c_b < m) | ||
| if mask_b.any(): | ||
| pairs_b = np.stack([r_b[mask_b], c_b[mask_b]], axis=1).astype(np.int64, copy=False) | ||
| # Map back to A orientation if needed | ||
| pairs_a = pairs_b[:, ::-1] if transposed else pairs_b | ||
| if return_cost and pairs_a.size: | ||
| total = float(A[pairs_a[:, 0], pairs_a[:, 1]].sum()) | ||
| else: | ||
| total = 0.0 | ||
| else: | ||
| pairs_a = np.empty((0, 2), dtype=np.int64) | ||
| total = 0.0 | ||
| return (total, pairs_a) if return_cost else pairs_a |
| # Copyright (c) 2025 Ratha SIV | MIT License | ||
| import os | ||
| import numpy as np | ||
| from typing import List, Tuple, Union | ||
| from concurrent.futures import ThreadPoolExecutor, as_completed | ||
| from ._lapjvx import lapjvx as _lapjvx_single # type: ignore | ||
| from ._lapjvx import lapjvxa as _lapjvxa_single # type: ignore | ||
| def _normalize_threads(n_threads: int) -> int: | ||
| if not n_threads: | ||
| return max(1, int(os.cpu_count() or 1)) | ||
| return max(1, int(n_threads)) | ||
| def lapjvx_batch( | ||
| costs: np.ndarray, | ||
| extend_cost: bool = False, | ||
| cost_limit: float = np.inf, | ||
| return_cost: bool = True, | ||
| n_threads: int = 0, | ||
| ) -> Union[ | ||
| Tuple[np.ndarray, List[np.ndarray], List[np.ndarray]], | ||
| Tuple[List[np.ndarray], List[np.ndarray]] | ||
| ]: | ||
| """ | ||
| Batched lapjvx solver with a thread pool. | ||
| This function applies a JVX-style solver (`lapjvx`) across a batch of cost | ||
| matrices and aggregates the results. It preserves batch order and supports | ||
| multi-threaded execution. | ||
| Parameters | ||
| ---------- | ||
| costs : np.ndarray, shape (B, N, M) | ||
| Batch of cost matrices (float32/float64). | ||
| extend_cost : bool, default False | ||
| If True, rectangular matrices are handled via internal zero-padding. | ||
| If False, instances must be square. | ||
| cost_limit : float, default np.inf | ||
| A per-instance threshold to prune/limit assignments, forwarded to the | ||
| underlying `lapjvx` implementation. | ||
| return_cost : bool, default True | ||
| If True, returns per-instance totals first. | ||
| n_threads : int, default 0 | ||
| Number of worker threads. 0 or None uses `os.cpu_count()`. | ||
| Returns | ||
| ------- | ||
| If return_cost is True: | ||
| totals : np.ndarray, shape (B,), float64 | ||
| rows_list : List[np.ndarray[int64]] of length B | ||
| cols_list : List[np.ndarray[int64]] of length B | ||
| Else: | ||
| rows_list, cols_list | ||
| Raises | ||
| ------ | ||
| ValueError | ||
| - If `costs` is not a 3D array. | ||
| - If any instance is rectangular while `extend_cost=False`. | ||
| Notes | ||
| ----- | ||
| - See the single-instance `lapjvx` for detailed behavior around `extend_cost` | ||
| and `cost_limit`. | ||
| """ | ||
| A = np.asarray(costs) | ||
| if A.ndim != 3: | ||
| raise ValueError("3-dimensional array expected [B, N, M]") | ||
| B = A.shape[0] | ||
| threads = _normalize_threads(n_threads) | ||
| totals = np.empty((B,), dtype=np.float64) if return_cost else None | ||
| rows_list: List[np.ndarray] = [None] * B # type: ignore | ||
| cols_list: List[np.ndarray] = [None] * B # type: ignore | ||
| def work(bi: int): | ||
| a2d = A[bi] | ||
| if return_cost: | ||
| total, rows, cols = _lapjvx_single( | ||
| a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=True | ||
| ) | ||
| return bi, total, rows, cols | ||
| else: | ||
| rows, cols = _lapjvx_single( | ||
| a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=False | ||
| ) | ||
| return bi, None, rows, cols | ||
| if threads == 1 or B == 1: | ||
| for bi in range(B): | ||
| i, t, r, c = work(bi) | ||
| if return_cost: | ||
| totals[i] = float(t) # type: ignore | ||
| rows_list[i] = np.asarray(r, dtype=np.int64) | ||
| cols_list[i] = np.asarray(c, dtype=np.int64) | ||
| else: | ||
| # Cap workers to the batch size | ||
| with ThreadPoolExecutor(max_workers=min(threads, B)) as ex: | ||
| futures = [ex.submit(work, bi) for bi in range(B)] | ||
| for fut in as_completed(futures): | ||
| i, t, r, c = fut.result() | ||
| if return_cost: | ||
| totals[i] = float(t) # type: ignore | ||
| rows_list[i] = np.asarray(r, dtype=np.int64) | ||
| cols_list[i] = np.asarray(c, dtype=np.int64) | ||
| if return_cost: | ||
| return np.asarray(totals, dtype=np.float64), rows_list, cols_list # type: ignore | ||
| return rows_list, cols_list | ||
| def lapjvxa_batch( | ||
| costs: np.ndarray, | ||
| extend_cost: bool = False, | ||
| cost_limit: float = np.inf, | ||
| return_cost: bool = True, | ||
| n_threads: int = 0, | ||
| ) -> Union[Tuple[np.ndarray, List[np.ndarray]], List[np.ndarray]]: | ||
| """ | ||
| Batched lapjvxa solver, returning (K_b, 2) arrays per instance. | ||
| Parameters | ||
| ---------- | ||
| costs : np.ndarray, shape (B, N, M) | ||
| Batch of cost matrices. | ||
| extend_cost : bool, default False | ||
| If True, rectangular matrices are solved by internal zero-padding. | ||
| cost_limit : float, default np.inf | ||
| Forwarded to `lapjvxa` to limit/prune assignments. | ||
| return_cost : bool, default True | ||
| If True, includes per-instance totals as the first returned array. | ||
| n_threads : int, default 0 | ||
| Number of worker threads. 0 or None uses `os.cpu_count()`. | ||
| Returns | ||
| ------- | ||
| If return_cost is True: | ||
| totals : np.ndarray, shape (B,), float64 | ||
| pairs_list : List[np.ndarray[int64] with shape (K_b, 2)] | ||
| Else: | ||
| pairs_list | ||
| Raises | ||
| ------ | ||
| ValueError | ||
| If `costs` is not a 3D array, or if any instance is rectangular while | ||
| `extend_cost=False`. | ||
| Notes | ||
| ----- | ||
| - See `lapjvxa` for single-instance behavior and semantics of `cost_limit`. | ||
| - Results are returned in the original batch order. | ||
| """ | ||
| A = np.asarray(costs) | ||
| if A.ndim != 3: | ||
| raise ValueError("3-dimensional array expected [B, N, M]") | ||
| B = A.shape[0] | ||
| threads = _normalize_threads(n_threads) | ||
| totals = np.empty((B,), dtype=np.float64) if return_cost else None | ||
| pairs_list: List[np.ndarray] = [None] * B # type: ignore | ||
| def work(bi: int): | ||
| a2d = A[bi] | ||
| if return_cost: | ||
| total, pairs = _lapjvxa_single( | ||
| a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=True | ||
| ) | ||
| return bi, total, pairs | ||
| else: | ||
| pairs = _lapjvxa_single( | ||
| a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=False | ||
| ) | ||
| return bi, None, pairs | ||
| if threads == 1 or B == 1: | ||
| for bi in range(B): | ||
| i, t, P = work(bi) | ||
| if return_cost: | ||
| totals[i] = float(t) # type: ignore | ||
| pairs_list[i] = np.asarray(P, dtype=np.int64) | ||
| else: | ||
| # Cap workers to the batch size | ||
| with ThreadPoolExecutor(max_workers=min(threads, B)) as ex: | ||
| futures = [ex.submit(work, bi) for bi in range(B)] | ||
| for fut in as_completed(futures): | ||
| i, t, P = fut.result() | ||
| if return_cost: | ||
| totals[i] = float(t) # type: ignore | ||
| pairs_list[i] = np.asarray(P, dtype=np.int64) | ||
| if return_cost: | ||
| return np.asarray(totals, dtype=np.float64), pairs_list # type: ignore | ||
| return pairs_list |
| # Copyright (c) 2025 Ratha SIV | MIT License | ||
| import numpy as np | ||
| from typing import Tuple, Union | ||
| from ._lapjvx import lapjvx as _lapjvx # type: ignore | ||
| from ._lapjvx import lapjvxa as _lapjvxa # type: ignore | ||
| def lapjvx( | ||
| cost: np.ndarray, | ||
| extend_cost: bool = False, | ||
| cost_limit: float = np.inf, | ||
| return_cost: bool = True, | ||
| ) -> Union[ | ||
| Tuple[float, np.ndarray, np.ndarray], | ||
| Tuple[np.ndarray, np.ndarray], | ||
| ]: | ||
| """ | ||
| Solve the Linear Assignment Problem using the Jonker-Volgenant algorithm, | ||
| returning (row_indices, col_indices) like scipy.optimize.linear_sum_assignment. | ||
| Parameters | ||
| ---------- | ||
| cost : np.ndarray, shape (N, M) | ||
| 2D cost matrix. Any float dtype is accepted; internally a single contiguous | ||
| float64 working buffer is used when required. | ||
| extend_cost : bool, default False | ||
| Permit rectangular inputs by zero-padding to a square matrix. | ||
| cost_limit : float, default np.inf | ||
| If finite, augment to size (N+M) with sentinel edges of cost_limit/2 and a | ||
| bottom-right zero block, modeling a per-edge reject cost (allows rectangular inputs). | ||
| return_cost : bool, default True | ||
| If True, include total assignment cost first (computed on the ORIGINAL input). | ||
| Returns | ||
| ------- | ||
| If return_cost is True: | ||
| total_cost : float | ||
| row_indices : np.ndarray with shape (K,), dtype int64 | ||
| Row indices of selected assignments (in original orientation). | ||
| col_indices : np.ndarray with shape (K,), typically dtype int32 | ||
| Column indices corresponding to row_indices. | ||
| Else: | ||
| row_indices, col_indices | ||
| Notes | ||
| ----- | ||
| - Orientation is normalized internally so the native kernel sees rows <= cols; | ||
| indices are mapped back to the ORIGINAL orientation on return. | ||
| - Dtypes of the returned indices follow the Cython implementation: | ||
| row_indices as int64, col_indices often int32 (subject to NumPy/platform). | ||
| - Unified augmentation policy: | ||
| * cost_limit < inf: augment to (N+M) (rectangular allowed). | ||
| * elif (N != M) or extend_cost: zero-pad to square max(N, M). | ||
| * else: run on the given square matrix. | ||
| """ | ||
| return _lapjvx(cost, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=return_cost) | ||
| def lapjvxa( | ||
| cost: np.ndarray, | ||
| extend_cost: bool = False, | ||
| cost_limit: float = np.inf, | ||
| return_cost: bool = True, | ||
| ) -> Union[ | ||
| Tuple[float, np.ndarray], | ||
| np.ndarray, | ||
| ]: | ||
| """ | ||
| Like lapjvx, but returns assignment pairs as a compact (K, 2) ndarray of (row, col). | ||
| Parameters | ||
| ---------- | ||
| cost : np.ndarray, shape (N, M) | ||
| 2D cost matrix. | ||
| extend_cost : bool, default False | ||
| Permit rectangular inputs by zero-padding to a square matrix. | ||
| cost_limit : float, default np.inf | ||
| When finite, augment to (N+M) to model per-edge reject cost (see lapjvx). | ||
| return_cost : bool, default True | ||
| If True, include the total cost as the first element. | ||
| Returns | ||
| ------- | ||
| If return_cost is True: | ||
| total_cost : float | ||
| assignments : np.ndarray with shape (K, 2), dtype int32 | ||
| Each row is (row_index, col_index) in the ORIGINAL orientation. | ||
| Else: | ||
| assignments : np.ndarray with shape (K, 2), dtype int32 | ||
| Notes | ||
| ----- | ||
| - This is a convenience wrapper over lapjvx that packs (rows, cols) into a (K, 2) array. | ||
| - Total cost is computed on the ORIGINAL input (not augmented or padded). | ||
| """ | ||
| return _lapjvxa(cost, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=return_cost) |
| # Copyright (c) 2012-2025 Tomas Kazmar | BSD-2-Clause License | ||
| import numpy as np | ||
| import numpy.typing as npt | ||
| from bisect import bisect_left | ||
| from typing import Tuple, Union | ||
| # import logging | ||
| from ._lapjv import _lapmod, FP_DYNAMIC_ as FP_DYNAMIC, LARGE_ as LARGE # type: ignore | ||
| def _pycrrt(n, cc, ii, kk, free_rows, x, y, v): | ||
| # log = logging.getLogger('do_column_reduction_and_reduction_transfer') | ||
| x[:] = -1 | ||
| y[:] = -1 | ||
| v[:] = LARGE | ||
| for i in range(n): | ||
| ks = slice(ii[i], ii[i+1]) | ||
| js = kk[ks] | ||
| ccs = cc[ks] | ||
| mask = ccs < v[js] | ||
| js = js[mask] | ||
| v[js] = ccs[mask] | ||
| y[js] = i | ||
| # log.debug('v = %s', v) | ||
| # for j in range(cost.shape[1]): | ||
| unique = np.empty((n,), dtype=bool) | ||
| unique[:] = True | ||
| for j in range(n-1, -1, -1): | ||
| i = y[j] | ||
| # If row is not taken yet, initialize it with the minimum stored in y. | ||
| if x[i] < 0: | ||
| x[i] = j | ||
| else: | ||
| unique[i] = False | ||
| y[j] = -1 | ||
| # log.debug('bw %s %s %s %s', i, j, x, y) | ||
| # log.debug('unique %s', unique) | ||
| n_free_rows = 0 | ||
| for i in range(n): | ||
| # Store unassigned row i. | ||
| if x[i] < 0: | ||
| free_rows[n_free_rows] = i | ||
| n_free_rows += 1 | ||
| elif unique[i] and ii[i+1] - ii[i] > 1: | ||
| # >1 check prevents choking on rows with a single entry | ||
| # Transfer from an assigned row. | ||
| j = x[i] | ||
| # Find the current 2nd minimum of the reduced column costs: | ||
| # (cost[i,j] - v[j]) for some j. | ||
| ks = slice(ii[i], ii[i+1]) | ||
| js = kk[ks] | ||
| minv = np.min(cc[ks][js != j] - v[js][js != j]) | ||
| # log.debug("v[%d] = %f - %f", j, v[j], minv) | ||
| v[j] -= minv | ||
| # log.debug('free: %s', free_rows[:n_free_rows]) | ||
| # log.debug('%s %s', x, v) | ||
| return n_free_rows | ||
| def find_minima(indices, values): | ||
| if len(indices) > 0: | ||
| j1 = indices[0] | ||
| v1 = values[0] | ||
| else: | ||
| j1 = 0 | ||
| v1 = LARGE | ||
| j2 = -1 | ||
| v2 = LARGE | ||
| # log = logging.getLogger('find_minima') | ||
| # log.debug(sorted(zip(values, indices))[:2]) | ||
| for j, h in zip(indices[1:], values[1:]): | ||
| # log.debug('%d = %f %d = %f', j1, v1, j2, v2) | ||
| if h < v2: | ||
| if h >= v1: | ||
| v2 = h | ||
| j2 = j | ||
| else: | ||
| v2 = v1 | ||
| v1 = h | ||
| j2 = j1 | ||
| j1 = j | ||
| # log.debug('%d = %f %d = %f', j1, v1, j2, v2) | ||
| return j1, v1, j2, v2 | ||
| def _pyarr(n, cc, ii, kk, n_free_rows, free_rows, x, y, v): | ||
| # log = logging.getLogger('do_augmenting_row_reduction') | ||
| # log.debug('%s %s %s', x, y, v) | ||
| current = 0 | ||
| # log.debug('free: %s', free_rows[:n_free_rows]) | ||
| new_free_rows = 0 | ||
| while current < n_free_rows: | ||
| free_i = free_rows[current] | ||
| # log.debug('current = %d', current) | ||
| current += 1 | ||
| ks = slice(ii[free_i], ii[free_i+1]) | ||
| js = kk[ks] | ||
| j1, v1, j2, v2 = find_minima(js, cc[ks] - v[js]) | ||
| i0 = y[j1] | ||
| v1_new = v[j1] - (v2 - v1) | ||
| v1_lowers = v1_new < v[j1] | ||
| # log.debug( | ||
| # '%d %d 1=%s,%f 2=%s,%f %f %s', | ||
| # free_i, i0, j1, v1, j2, v2, v1_new, v1_lowers) | ||
| if v1_lowers: | ||
| v[j1] = v1_new | ||
| elif i0 >= 0 and j2 != -1: # i0 is assigned, try j2 | ||
| j1 = j2 | ||
| i0 = y[j2] | ||
| x[free_i] = j1 | ||
| y[j1] = free_i | ||
| if i0 >= 0: | ||
| if v1_lowers: | ||
| current -= 1 | ||
| # log.debug('continue augmenting path from current %s %s %s') | ||
| free_rows[current] = i0 | ||
| else: | ||
| # log.debug('stop the augmenting path and keep for later') | ||
| free_rows[new_free_rows] = i0 | ||
| new_free_rows += 1 | ||
| # log.debug('free: %s', free_rows[:new_free_rows]) | ||
| return new_free_rows | ||
| def binary_search(data, key): | ||
| # log = logging.getLogger('binary_search') | ||
| i = bisect_left(data, key) | ||
| # log.debug('Found data[%d]=%d for %d', i, data[i], key) | ||
| if i < len(data) and data[i] == key: | ||
| return i | ||
| else: | ||
| return None | ||
| def _find(hi, d, cols, y): | ||
| lo, hi = hi, hi + 1 | ||
| minv = d[cols[lo]] | ||
| # XXX: anytime this happens to be NaN, i'm screwed... | ||
| # assert not np.isnan(minv) | ||
| for k in range(hi, len(cols)): | ||
| j = cols[k] | ||
| if d[j] <= minv: | ||
| # New minimum found, trash the new SCAN columns found so far. | ||
| if d[j] < minv: | ||
| hi = lo | ||
| minv = d[j] | ||
| cols[k], cols[hi] = cols[hi], j | ||
| hi += 1 | ||
| return minv, hi, cols | ||
| def _scan(n, cc, ii, kk, minv, lo, hi, d, cols, pred, y, v): | ||
| # log = logging.getLogger('_scan') | ||
| # Scan all TODO columns. | ||
| while lo != hi: | ||
| j = cols[lo] | ||
| lo += 1 | ||
| i = y[j] | ||
| # log.debug('?%d kk[%d:%d]=%s', j, ii[i], ii[i+1], kk[ii[i]:ii[i+1]]) | ||
| kj = binary_search(kk[ii[i]:ii[i+1]], j) | ||
| if kj is None: | ||
| continue | ||
| kj = ii[i] + kj | ||
| h = cc[kj] - v[j] - minv | ||
| # log.debug('i=%d j=%d kj=%s h=%f', i, j, kj, h) | ||
| for k in range(hi, n): | ||
| j = cols[k] | ||
| kj = binary_search(kk[ii[i]:ii[i+1]], j) | ||
| if kj is None: | ||
| continue | ||
| kj = ii[i] + kj | ||
| cred_ij = cc[kj] - v[j] - h | ||
| if cred_ij < d[j]: | ||
| d[j] = cred_ij | ||
| pred[j] = i | ||
| if cred_ij == minv: | ||
| if y[j] < 0: | ||
| return j, None, None, d, cols, pred | ||
| cols[k] = cols[hi] | ||
| cols[hi] = j | ||
| hi += 1 | ||
| return -1, lo, hi, d, cols, pred | ||
| def find_path(n, cc, ii, kk, start_i, y, v): | ||
| # log = logging.getLogger('find_path') | ||
| cols = np.arange(n, dtype=int) | ||
| pred = np.empty((n,), dtype=int) | ||
| pred[:] = start_i | ||
| d = np.empty((n,), dtype=float) | ||
| d[:] = LARGE | ||
| ks = slice(ii[start_i], ii[start_i+1]) | ||
| js = kk[ks] | ||
| d[js] = cc[ks] - v[js] | ||
| # log.debug('d = %s', d) | ||
| minv = LARGE | ||
| lo, hi = 0, 0 | ||
| n_ready = 0 | ||
| final_j = -1 | ||
| while final_j == -1: | ||
| # No SCAN columns, find new ones. | ||
| if lo == hi: | ||
| # log.debug('%d..%d -> find', lo, hi) | ||
| # log.debug('cols = %s', cols) | ||
| n_ready = lo | ||
| minv, hi, cols = _find(hi, d, cols, y) | ||
| # log.debug('%d..%d -> check', lo, hi) | ||
| # log.debug('cols = %s', cols) | ||
| # log.debug('y = %s', y) | ||
| for h in range(lo, hi): | ||
| # If any of the new SCAN columns is unassigned, use it. | ||
| if y[cols[h]] < 0: | ||
| final_j = cols[h] | ||
| if final_j == -1: | ||
| # log.debug('%d..%d -> scan', lo, hi) | ||
| final_j, lo, hi, d, cols, pred = _scan( | ||
| n, cc, ii, kk, minv, lo, hi, d, cols, pred, y, v) | ||
| # log.debug('d = %s', d) | ||
| # log.debug('cols = %s', cols) | ||
| # log.debug('pred = %s', pred) | ||
| # Update prices for READY columns. | ||
| for k in range(n_ready): # type: ignore | ||
| j0 = cols[k] | ||
| v[j0] += d[j0] - minv | ||
| assert final_j >= 0 | ||
| assert final_j < n | ||
| return final_j, pred | ||
| def _pya(n, cc, ii, kk, n_free_rows, free_rows, x, y, v): | ||
| # log = logging.getLogger('augment') | ||
| for free_i in free_rows[:n_free_rows]: | ||
| # log.debug('looking at free_i=%s', free_i) | ||
| j, pred = find_path(n, cc, ii, kk, free_i, y, v) | ||
| # Augment the path starting from column j and backtracking to free_i. | ||
| i = -1 | ||
| while i != free_i: | ||
| # log.debug('augment %s', j) | ||
| # log.debug('pred = %s', pred) | ||
| i = pred[j] | ||
| assert i >= 0 | ||
| assert i < n | ||
| # log.debug('y[%d]=%d -> %d', j, y[j], i) | ||
| y[j] = i | ||
| j, x[i] = x[i], j | ||
| def check_cost(n, cc, ii, kk): | ||
| if n == 0: | ||
| raise ValueError('Cost matrix has zero rows.') | ||
| if len(kk) == 0: | ||
| raise ValueError('Cost matrix has zero columns.') | ||
| lo = cc.min() | ||
| hi = cc.max() | ||
| if lo < 0: | ||
| raise ValueError('Cost matrix values must be non-negative.') | ||
| if hi >= LARGE: | ||
| raise ValueError( | ||
| 'Cost matrix values must be less than %s' % LARGE) | ||
| def get_cost(n, cc, ii, kk, x0): | ||
| ret = 0 | ||
| for i, j in enumerate(x0): | ||
| kj = binary_search(kk[ii[i]:ii[i+1]], j) | ||
| if kj is None: | ||
| return np.inf | ||
| kj = ii[i] + kj | ||
| ret += cc[kj] | ||
| return ret | ||
| # def lapmod(n, cc, ii, kk, fast=True, return_cost=True, fp_version=FP_DYNAMIC): | ||
| def lapmod( | ||
| n: int, | ||
| cc: npt.NDArray[np.floating], | ||
| ii: npt.NDArray[np.integer], | ||
| kk: npt.NDArray[np.integer], | ||
| fast: bool = True, | ||
| return_cost: bool = True, | ||
| fp_version: int = FP_DYNAMIC, | ||
| ) -> Union[ | ||
| Tuple[float, np.ndarray, np.ndarray], | ||
| Tuple[np.ndarray, np.ndarray], | ||
| ]: | ||
| """Solve sparse linear assignment problem using Jonker-Volgenant algorithm. | ||
| n: number of rows of the assignment cost matrix | ||
| cc: 1D array of all finite elements of the assignement cost matrix | ||
| ii: 1D array of indices of the row starts in cc. The following must hold: | ||
| ii[0] = 0 and ii[n+1] = len(cc). | ||
| kk: 1D array of the column indices so that: | ||
| cost[i, kk[ii[i] + k]] == cc[ii[i] + k]. | ||
| Indices within one row must be sorted. | ||
| extend_cost: whether or not extend a non-square matrix [default: False] | ||
| cost_limit: an upper limit for a cost of a single assignment | ||
| [default: np.inf] | ||
| return_cost: whether or not to return the assignment cost | ||
| Returns (opt, x, y) where: | ||
| opt: cost of the assignment | ||
| x: vector of columns assigned to rows | ||
| y: vector of rows assigned to columns | ||
| or (x, y) if return_cost is not True. | ||
| When extend_cost and/or cost_limit is set, all unmatched entries will be | ||
| marked by -1 in x/y. | ||
| """ | ||
| # log = logging.getLogger('lapmod') | ||
| check_cost(n, cc, ii, kk) | ||
| if fast is True: | ||
| # log.debug('[----CR & RT & ARR & augmentation ----]') | ||
| x, y = _lapmod(n, cc, ii, kk, fp_version=fp_version) | ||
| else: | ||
| cc = np.ascontiguousarray(cc, dtype=np.float64) | ||
| ii = np.ascontiguousarray(ii, dtype=np.int32) | ||
| kk = np.ascontiguousarray(kk, dtype=np.int32) | ||
| x = np.empty((n,), dtype=np.int32) | ||
| y = np.empty((n,), dtype=np.int32) | ||
| v = np.empty((n,), dtype=np.float64) | ||
| free_rows = np.empty((n,), dtype=np.int32) | ||
| # log.debug('[----Column reduction & reduction transfer----]') | ||
| n_free_rows = _pycrrt(n, cc, ii, kk, free_rows, x, y, v) | ||
| # log.debug( | ||
| # 'free, x, y, v: %s %s %s %s', free_rows[:n_free_rows], x, y, v) | ||
| if n_free_rows == 0: | ||
| # log.info('Reduction solved it.') | ||
| if return_cost is True: | ||
| return get_cost(n, cc, ii, kk, x), x, y | ||
| else: | ||
| return x, y | ||
| for it in range(2): | ||
| # log.debug('[---Augmenting row reduction (iteration: %d)---]', it) | ||
| n_free_rows = _pyarr( | ||
| n, cc, ii, kk, n_free_rows, free_rows, x, y, v) | ||
| # log.debug( | ||
| # 'free, x, y, v: %s %s %s %s', free_rows[:n_free_rows], x, y, v) | ||
| if n_free_rows == 0: | ||
| # log.info('Augmenting row reduction solved it.') | ||
| if return_cost is True: | ||
| return get_cost(n, cc, ii, kk, x), x, y | ||
| else: | ||
| return x, y | ||
| # log.info('[----Augmentation----]') | ||
| _pya(n, cc, ii, kk, n_free_rows, free_rows, x, y, v) | ||
| # log.debug('x, y, v: %s %s %s', x, y, v) | ||
| if return_cost is True: | ||
| return get_cost(n, cc, ii, kk, x), x, y | ||
| else: | ||
| return x, y |
+477
-452
| [](https://github.com/rathaROG/lapx/releases) | ||
| [](https://badge.fury.io/py/lapx) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml) | ||
| [](https://badge.fury.io/py/lapx) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml) | ||
@@ -9,4 +9,4 @@ [](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml) | ||
| `lapx` focuses more on real-world applications, and the [benchmark_batch.py](https://github.com/rathaROG/lapx/blob/main/.github/test/benchmark_batch.py) | ||
| and [benchmark.py](https://github.com/rathaROG/lapx/blob/main/.github/test/benchmark.py) are **not** | ||
| `lapx` focuses more on real-world applications, and the [benchmark_batch.py](https://github.com/rathaROG/lapx/blob/main/benchmarks/benchmark_batch.py) | ||
| and [benchmark_single.py](https://github.com/rathaROG/lapx/blob/main/benchmarks/benchmark_single.py) are **not** | ||
| intended for scientific research or competitive evaluation. Instead, it provides a quick and accessible way for | ||
@@ -24,53 +24,68 @@ you to run benchmark tests on your own machine. Below, you will also find a collection of interesting results | ||
| git clone https://github.com/rathaROG/lapx.git | ||
| cd lapx/.github/test | ||
| cd lapx/benchmarks | ||
| python benchmark_batch.py | ||
| python benchmark.py | ||
| python benchmark_single.py | ||
| ``` | ||
| Note: [SciPy](https://pypi.org/project/scipy/) is used as the baseline in the benchmark. | ||
| Note: [SciPy](https://pypi.org/project/scipy/) is used as the baseline in the benchmark single `benchmark_single.py`. | ||
| 📊 Some benchmark results using `lapx` [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) (2025/10/27): | ||
| 📊 Some benchmark results using `lapx` [v0.9.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) (2025/10/27): | ||
| <details><summary>🗂️ Batch on my Windows 11 i9-13900KS (8 p-core + 8 e-core) + python 3.9.13:</summary> | ||
| <details><summary>🗂️ Batch on my local Windows 11 i9-13900KS (8 p-core + 8 e-core) + python 3.11.9:</summary> | ||
| ``` | ||
| # 50 x (3000x3000) | n_threads = 24 | ||
| Microsoft Windows [Version 10.0.26200.7019] | ||
| (c) Microsoft Corporation. All rights reserved. | ||
| CPU lapx-batch-jvx : cost=82.08230873, time=1.40321970s | ||
| CPU lapx-batch-jvs : cost=82.08230873, time=0.80294538s | ||
| CPU lapx-batch-jvxa : cost=82.08230873, time=1.40610409s | ||
| CPU lapx-batch-jvsa : cost=82.08230873, time=0.81906796s | ||
| CPU lapx-batch-jvsa64 : cost=82.08230873, time=1.42109323s | ||
| CPU lapx-loop-jvx : cost=82.08230873, time=11.01966000s | ||
| CPU lapx-loop-jvs : cost=82.08230873, time=8.18710470s | ||
| D:\DEV\lapx_all\tmp\lapx\benchmarks>python benchmark_batch.py | ||
| # 100 x (2000x2000) | n_threads = 24 | ||
| # 10 x (4000x4000) | n_threads = 24 | ||
| CPU lapx-batch-jvx : cost=164.54469568, time=0.81932855s | ||
| CPU lapx-batch-jvs : cost=164.54469568, time=0.58506370s | ||
| CPU lapx-batch-jvxa : cost=164.54469568, time=0.83581567s | ||
| CPU lapx-batch-jvsa : cost=164.54469568, time=0.59467125s | ||
| CPU lapx-batch-jvsa64 : cost=164.54469568, time=0.88178015s | ||
| CPU lapx-loop-jvx : cost=164.54469568, time=7.68291450s | ||
| CPU lapx-loop-jvs : cost=164.54469568, time=6.44884777s | ||
| CPU lapx-batch-jvx : cost=16.48859572, time=0.67588449s | ||
| CPU lapx-batch-jvs : cost=16.48859572, time=0.46411657s | ||
| CPU lapx-batch-jvxa : cost=16.48859572, time=0.71385884s | ||
| CPU lapx-batch-jvsa : cost=16.48859572, time=0.45670390s | ||
| CPU lapx-batch-jvsa64 : cost=16.48859572, time=0.70847058s | ||
| CPU lapx-loop-jvx : cost=16.48859572, time=3.95986462s | ||
| CPU lapx-loop-jvs : cost=16.48859572, time=2.66866994s | ||
| # 500 x (1000x2000) | n_threads = 24 | ||
| # 20 x (3000x2000) | n_threads = 24 | ||
| CPU lapx-batch-jvx : cost=291.25078928, time=0.91706204s | ||
| CPU lapx-batch-jvs : cost=291.25078928, time=0.79455686s | ||
| CPU lapx-batch-jvxa : cost=291.25078928, time=0.93096972s | ||
| CPU lapx-batch-jvsa : cost=291.25078928, time=0.79109597s | ||
| CPU lapx-batch-jvsa64 : cost=291.25078928, time=1.23274732s | ||
| CPU lapx-loop-jvx : cost=291.25078928, time=5.47222424s | ||
| CPU lapx-loop-jvs : cost=291.25078928, time=5.73832059s | ||
| CPU lapx-batch-jvx : cost=16.65042067, time=0.18923616s | ||
| CPU lapx-batch-jvs : cost=16.65042067, time=0.17624354s | ||
| CPU lapx-batch-jvxa : cost=16.65042067, time=0.18447852s | ||
| CPU lapx-batch-jvsa : cost=16.65042067, time=0.18925667s | ||
| CPU lapx-batch-jvsa64 : cost=16.65042067, time=0.18949389s | ||
| CPU lapx-loop-jvx : cost=16.65042067, time=0.85662770s | ||
| CPU lapx-loop-jvs : cost=16.65042067, time=1.05569839s | ||
| # 1000 x (1000x1000) | n_threads = 24 | ||
| # 50 x (2000x2000) | n_threads = 24 | ||
| CPU lapx-batch-jvx : cost=1641.72891905, time=1.18257976s | ||
| CPU lapx-batch-jvs : cost=1641.72891905, time=1.13616300s | ||
| CPU lapx-batch-jvxa : cost=1641.72891905, time=1.16668177s | ||
| CPU lapx-batch-jvsa : cost=1641.72891905, time=1.11944461s | ||
| CPU lapx-batch-jvsa64 : cost=1641.72891905, time=1.23001194s | ||
| CPU lapx-loop-jvx : cost=1641.72891905, time=13.90460992s | ||
| CPU lapx-loop-jvs : cost=1641.72891905, time=14.32015085s | ||
| CPU lapx-batch-jvx : cost=82.12386385, time=0.56725645s | ||
| CPU lapx-batch-jvs : cost=82.12386385, time=0.37664533s | ||
| CPU lapx-batch-jvxa : cost=82.12386385, time=0.57265162s | ||
| CPU lapx-batch-jvsa : cost=82.12386385, time=0.37772393s | ||
| CPU lapx-batch-jvsa64 : cost=82.12386385, time=0.61493921s | ||
| CPU lapx-loop-jvx : cost=82.12386385, time=4.46092606s | ||
| CPU lapx-loop-jvs : cost=82.12386385, time=3.49988031s | ||
| # 100 x (1000x2000) | n_threads = 24 | ||
| CPU lapx-batch-jvx : cost=58.19636934, time=0.18971944s | ||
| CPU lapx-batch-jvs : cost=58.19636934, time=0.16700149s | ||
| CPU lapx-batch-jvxa : cost=58.19636934, time=0.18943620s | ||
| CPU lapx-batch-jvsa : cost=58.19636934, time=0.16706610s | ||
| CPU lapx-batch-jvsa64 : cost=58.19636934, time=0.25204611s | ||
| CPU lapx-loop-jvx : cost=58.19636934, time=1.02838278s | ||
| CPU lapx-loop-jvs : cost=58.19636934, time=1.21967244s | ||
| # 500 x (1000x1000) | n_threads = 24 | ||
| CPU lapx-batch-jvx : cost=821.97407482, time=0.59273267s | ||
| CPU lapx-batch-jvs : cost=821.97407482, time=0.58274126s | ||
| CPU lapx-batch-jvxa : cost=821.97407482, time=0.58346224s | ||
| CPU lapx-batch-jvsa : cost=821.97407482, time=0.58098578s | ||
| CPU lapx-batch-jvsa64 : cost=821.97407482, time=0.61520362s | ||
| CPU lapx-loop-jvx : cost=821.97407482, time=6.64442897s | ||
| CPU lapx-loop-jvs : cost=821.97407482, time=7.03527546s | ||
| ``` | ||
@@ -82,3 +97,3 @@ | ||
| https://github.com/rathaROG/lapx/actions/runs/18851354956/job/53788494991 | ||
| https://github.com/rathaROG/lapx/actions/runs/18961613065/job/54149890164 | ||
@@ -89,17 +104,17 @@ ``` | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 2.13 x slower | ||
| * lapjv : ✅ Passed 🐌 2.85 x slower | ||
| * lapjvx : ✅ Passed 🐌 1.17 x slower | ||
| * lapjvxa : ✅ Passed 🏆 1.6 x faster | ||
| * lapjvs : ✅ Passed 🐌 2.33 x slower | ||
| * lapjvsa : ✅ Passed 🐌 2.1 x slower | ||
| * lapjvc : ✅ Passed 🐌 1.64 x slower | ||
| * lapjv : ✅ Passed 🐌 5.53 x slower | ||
| * lapjvx : ✅ Passed 🐌 2.68 x slower | ||
| * lapjvxa : ✅ Passed 🐌 1.81 x slower | ||
| * lapjvs : ✅ Passed 🐌 3.56 x slower | ||
| * lapjvsa : ✅ Passed 🐌 3.36 x slower | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvxa : 0.00001882s | ||
| 2. scipy ⭐ : 0.00003012s | ||
| 3. lapjvx : 0.00003522s | ||
| 4. lapjvsa : 0.00006335s | ||
| 5. lapjvc : 0.00006415s | ||
| 6. lapjvs : 0.00007016s | ||
| 7. lapjv : 0.00008579s | ||
| 1. scipy ⭐ : 0.00001024s | ||
| 2. lapjvc : 0.00001677s | ||
| 3. lapjvxa : 0.00001853s | ||
| 4. lapjvx : 0.00002740s | ||
| 5. lapjvsa : 0.00003439s | ||
| 6. lapjvs : 0.00003648s | ||
| 7. lapjv : 0.00005660s | ||
| ------------------------------- | ||
@@ -110,17 +125,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 1.5 x slower | ||
| * lapjv : ✅ Passed 🐌 1.9 x slower | ||
| * lapjvx : ✅ Passed 🐌 1.3 x slower | ||
| * lapjvxa : ✅ Passed 🏆 1.15 x faster | ||
| * lapjvs : ✅ Passed 🐌 1.92 x slower | ||
| * lapjvsa : ✅ Passed 🏆 1.79 x faster | ||
| * lapjvc : ✅ Passed 🐌 2.03 x slower | ||
| * lapjv : ✅ Passed 🐌 5.72 x slower | ||
| * lapjvx : ✅ Passed 🐌 2.28 x slower | ||
| * lapjvxa : ✅ Passed 🐌 1.75 x slower | ||
| * lapjvs : ✅ Passed 🐌 2.7 x slower | ||
| * lapjvsa : ✅ Passed 🐌 1.04 x slower | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvsa : 0.00000646s | ||
| 2. lapjvxa : 0.00001002s | ||
| 3. scipy ⭐ : 0.00001154s | ||
| 4. lapjvx : 0.00001498s | ||
| 5. lapjvc : 0.00001732s | ||
| 6. lapjv : 0.00002196s | ||
| 7. lapjvs : 0.00002215s | ||
| 1. scipy ⭐ : 0.00000664s | ||
| 2. lapjvsa : 0.00000690s | ||
| 3. lapjvxa : 0.00001165s | ||
| 4. lapjvc : 0.00001346s | ||
| 5. lapjvx : 0.00001517s | ||
| 6. lapjvs : 0.00001796s | ||
| 7. lapjv : 0.00003800s | ||
| ------------------------------- | ||
@@ -131,17 +146,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 1.82 x slower | ||
| * lapjv : ✅ Passed 🐌 3.92 x slower | ||
| * lapjvx : ✅ Passed 🐌 2.34 x slower | ||
| * lapjvxa : ✅ Passed 🐌 1.69 x slower | ||
| * lapjvs : ✅ Passed 🐌 3.3 x slower | ||
| * lapjvsa : ✅ Passed 🐌 5.28 x slower | ||
| * lapjvc : ✅ Passed 🐌 2.3 x slower | ||
| * lapjv : ✅ Passed 🐌 9.53 x slower | ||
| * lapjvx : ✅ Passed 🐌 3.62 x slower | ||
| * lapjvxa : ✅ Passed 🐌 3.04 x slower | ||
| * lapjvs : ✅ Passed 🐌 4.63 x slower | ||
| * lapjvsa : ✅ Passed 🐌 5.32 x slower | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. scipy ⭐ : 0.00000862s | ||
| 2. lapjvxa : 0.00001455s | ||
| 3. lapjvc : 0.00001566s | ||
| 4. lapjvx : 0.00002017s | ||
| 5. lapjvs : 0.00002845s | ||
| 6. lapjv : 0.00003373s | ||
| 7. lapjvsa : 0.00004552s | ||
| 1. scipy ⭐ : 0.00000537s | ||
| 2. lapjvc : 0.00001233s | ||
| 3. lapjvxa : 0.00001631s | ||
| 4. lapjvx : 0.00001947s | ||
| 5. lapjvs : 0.00002489s | ||
| 6. lapjvsa : 0.00002857s | ||
| 7. lapjv : 0.00005116s | ||
| ------------------------------- | ||
@@ -152,17 +167,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 1.87 x slower | ||
| * lapjv : ✅ Passed 🏆 1.19 x faster | ||
| * lapjvx : ✅ Passed 🏆 1.62 x faster | ||
| * lapjvxa : ✅ Passed 🏆 2.35 x faster | ||
| * lapjvs : ✅ Passed 🏆 1.33 x faster | ||
| * lapjvsa : ✅ Passed 🏆 1.32 x faster | ||
| * lapjvc : ✅ Passed 🐌 1.94 x slower | ||
| * lapjv : ✅ Passed 🐌 1.34 x slower | ||
| * lapjvx : ✅ Passed 🏆 1.15 x faster | ||
| * lapjvxa : ✅ Passed 🏆 1.41 x faster | ||
| * lapjvs : ✅ Passed 🐌 1.98 x slower | ||
| * lapjvsa : ✅ Passed 🏆 1.13 x faster | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvxa : 0.00003084s | ||
| 2. lapjvx : 0.00004490s | ||
| 3. lapjvs : 0.00005469s | ||
| 4. lapjvsa : 0.00005475s | ||
| 5. lapjv : 0.00006076s | ||
| 6. scipy ⭐ : 0.00007253s | ||
| 7. lapjvc : 0.00013553s | ||
| 1. lapjvxa : 0.00003351s | ||
| 2. lapjvx : 0.00004110s | ||
| 3. lapjvsa : 0.00004164s | ||
| 4. scipy ⭐ : 0.00004721s | ||
| 5. lapjv : 0.00006310s | ||
| 6. lapjvc : 0.00009150s | ||
| 7. lapjvs : 0.00009357s | ||
| ------------------------------- | ||
@@ -173,17 +188,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🏆 1.19 x faster | ||
| * lapjv : ✅ Passed 🏆 2.1 x faster | ||
| * lapjvx : ✅ Passed 🏆 2.77 x faster | ||
| * lapjvxa : ✅ Passed 🏆 4.34 x faster | ||
| * lapjvs : ✅ Passed 🏆 2.3 x faster | ||
| * lapjvsa : ✅ Passed 🏆 5.58 x faster | ||
| * lapjvc : ✅ Passed 🏆 1.43 x faster | ||
| * lapjv : ✅ Passed 🏆 1.44 x faster | ||
| * lapjvx : ✅ Passed 🏆 2.25 x faster | ||
| * lapjvxa : ✅ Passed 🏆 2.94 x faster | ||
| * lapjvs : ✅ Passed 🏆 2.27 x faster | ||
| * lapjvsa : ✅ Passed 🏆 3.99 x faster | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvsa : 0.00001290s | ||
| 2. lapjvxa : 0.00001658s | ||
| 3. lapjvx : 0.00002601s | ||
| 4. lapjvs : 0.00003129s | ||
| 5. lapjv : 0.00003426s | ||
| 6. lapjvc : 0.00006070s | ||
| 7. scipy ⭐ : 0.00007195s | ||
| 1. lapjvsa : 0.00002166s | ||
| 2. lapjvxa : 0.00002932s | ||
| 3. lapjvs : 0.00003803s | ||
| 4. lapjvx : 0.00003831s | ||
| 5. lapjv : 0.00005988s | ||
| 6. lapjvc : 0.00006028s | ||
| 7. scipy ⭐ : 0.00008634s | ||
| ------------------------------- | ||
@@ -194,17 +209,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 1.59 x slower | ||
| * lapjv : ✅ Passed 🏆 1.14 x faster | ||
| * lapjvx : ✅ Passed 🏆 1.65 x faster | ||
| * lapjvxa : ✅ Passed 🏆 2.33 x faster | ||
| * lapjvs : ✅ Passed 🏆 1.58 x faster | ||
| * lapjvsa : ✅ Passed 🏆 1.37 x faster | ||
| * lapjvc : ✅ Passed 🐌 1.22 x slower | ||
| * lapjv : ✅ Passed 🏆 1.07 x faster | ||
| * lapjvx : ✅ Passed 🏆 1.54 x faster | ||
| * lapjvxa : ✅ Passed 🏆 1.88 x faster | ||
| * lapjvs : ✅ Passed 🏆 1.63 x faster | ||
| * lapjvsa : ✅ Passed 🏆 1.47 x faster | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvxa : 0.00003199s | ||
| 2. lapjvx : 0.00004507s | ||
| 3. lapjvs : 0.00004723s | ||
| 4. lapjvsa : 0.00005446s | ||
| 5. lapjv : 0.00006505s | ||
| 6. scipy ⭐ : 0.00007443s | ||
| 7. lapjvc : 0.00011813s | ||
| 1. lapjvxa : 0.00004026s | ||
| 2. lapjvs : 0.00004654s | ||
| 3. lapjvx : 0.00004924s | ||
| 4. lapjvsa : 0.00005152s | ||
| 5. lapjv : 0.00007051s | ||
| 6. scipy ⭐ : 0.00007566s | ||
| 7. lapjvc : 0.00009201s | ||
| ------------------------------- | ||
@@ -215,17 +230,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 4.19 x slower | ||
| * lapjv : ✅ Passed 🏆 2.46 x faster | ||
| * lapjvx : ✅ Passed 🏆 2.84 x faster | ||
| * lapjvxa : ✅ Passed 🏆 3.94 x faster | ||
| * lapjvs : ✅ Passed 🏆 4.37 x faster | ||
| * lapjvsa : ✅ Passed 🏆 4.45 x faster | ||
| * lapjvc : ✅ Passed 🐌 4.97 x slower | ||
| * lapjv : ✅ Passed 🏆 2.02 x faster | ||
| * lapjvx : ✅ Passed 🏆 2.34 x faster | ||
| * lapjvxa : ✅ Passed 🏆 3.09 x faster | ||
| * lapjvs : ✅ Passed 🏆 3.95 x faster | ||
| * lapjvsa : ✅ Passed 🏆 3.89 x faster | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvsa : 0.00108461s | ||
| 2. lapjvs : 0.00110318s | ||
| 3. lapjvxa : 0.00122390s | ||
| 4. lapjvx : 0.00169714s | ||
| 5. lapjv : 0.00195890s | ||
| 6. scipy ⭐ : 0.00482560s | ||
| 7. lapjvc : 0.02019986s | ||
| 1. lapjvs : 0.00116294s | ||
| 2. lapjvsa : 0.00117967s | ||
| 3. lapjvxa : 0.00148459s | ||
| 4. lapjvx : 0.00196006s | ||
| 5. lapjv : 0.00227485s | ||
| 6. scipy ⭐ : 0.00458916s | ||
| 7. lapjvc : 0.02279758s | ||
| ------------------------------- | ||
@@ -236,17 +251,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 1.01 x slower | ||
| * lapjv : ✅ Passed 🏆 2.0 x faster | ||
| * lapjvx : ✅ Passed 🏆 2.06 x faster | ||
| * lapjvxa : ✅ Passed 🏆 2.06 x faster | ||
| * lapjvs : ✅ Passed 🏆 2.05 x faster | ||
| * lapjvsa : ✅ Passed 🏆 2.06 x faster | ||
| * lapjvc : ✅ Passed 🏆 1.46 x faster | ||
| * lapjv : ✅ Passed 🏆 1.19 x faster | ||
| * lapjvx : ✅ Passed 🏆 1.19 x faster | ||
| * lapjvxa : ✅ Passed 🏆 1.21 x faster | ||
| * lapjvs : ✅ Passed 🏆 1.58 x faster | ||
| * lapjvsa : ✅ Passed 🏆 1.6 x faster | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvsa : 0.00498536s | ||
| 2. lapjvxa : 0.00499154s | ||
| 3. lapjvx : 0.00499524s | ||
| 4. lapjvs : 0.00501234s | ||
| 5. lapjv : 0.00512501s | ||
| 6. scipy ⭐ : 0.01026616s | ||
| 7. lapjvc : 0.01041151s | ||
| 1. lapjvsa : 0.00610949s | ||
| 2. lapjvs : 0.00617076s | ||
| 3. lapjvc : 0.00669765s | ||
| 4. lapjvxa : 0.00807378s | ||
| 5. lapjvx : 0.00817340s | ||
| 6. lapjv : 0.00822582s | ||
| 7. scipy ⭐ : 0.00976096s | ||
| ------------------------------- | ||
@@ -257,17 +272,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 4.17 x slower | ||
| * lapjv : ✅ Passed 🏆 3.74 x faster | ||
| * lapjvx : ✅ Passed 🏆 4.29 x faster | ||
| * lapjvxa : ✅ Passed 🏆 4.37 x faster | ||
| * lapjvs : ✅ Passed 🏆 4.33 x faster | ||
| * lapjvsa : ✅ Passed 🏆 4.38 x faster | ||
| * lapjvc : ✅ Passed 🐌 4.66 x slower | ||
| * lapjv : ✅ Passed 🏆 2.07 x faster | ||
| * lapjvx : ✅ Passed 🏆 3.44 x faster | ||
| * lapjvxa : ✅ Passed 🏆 3.42 x faster | ||
| * lapjvs : ✅ Passed 🏆 4.27 x faster | ||
| * lapjvsa : ✅ Passed 🏆 4.37 x faster | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvsa : 0.00136651s | ||
| 2. lapjvxa : 0.00136884s | ||
| 3. lapjvs : 0.00138280s | ||
| 4. lapjvx : 0.00139509s | ||
| 5. lapjv : 0.00160113s | ||
| 6. scipy ⭐ : 0.00598653s | ||
| 7. lapjvc : 0.02498232s | ||
| 1. lapjvsa : 0.00128856s | ||
| 2. lapjvs : 0.00131904s | ||
| 3. lapjvx : 0.00163978s | ||
| 4. lapjvxa : 0.00164817s | ||
| 5. lapjv : 0.00272247s | ||
| 6. scipy ⭐ : 0.00563737s | ||
| 7. lapjvc : 0.02625532s | ||
| ------------------------------- | ||
@@ -278,17 +293,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 222.34 x slower | ||
| * lapjv : ✅ Passed 🏆 1.15 x faster | ||
| * lapjvx : ✅ Passed 🏆 1.31 x faster | ||
| * lapjvxa : ✅ Passed 🏆 1.29 x faster | ||
| * lapjvc : ✅ Passed 🐌 257.47 x slower | ||
| * lapjv : ✅ Passed 🏆 1.09 x faster | ||
| * lapjvx : ✅ Passed 🏆 1.09 x faster | ||
| * lapjvxa : ✅ Passed 🏆 1.08 x faster | ||
| * lapjvs : ✅ Passed 🐌 1.11 x slower | ||
| * lapjvsa : ✅ Passed 🐌 1.11 x slower | ||
| * lapjvsa : ✅ Passed 🐌 1.1 x slower | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvx : 0.07696005s | ||
| 2. lapjvxa : 0.07810211s | ||
| 3. lapjv : 0.08824028s | ||
| 4. scipy ⭐ : 0.10107496s | ||
| 5. lapjvsa : 0.11232857s | ||
| 6. lapjvs : 0.11252413s | ||
| 7. lapjvc : 22.47302152s | ||
| 1. lapjvx : 0.09400308s | ||
| 2. lapjv : 0.09424586s | ||
| 3. lapjvxa : 0.09509772s | ||
| 4. scipy ⭐ : 0.10258777s | ||
| 5. lapjvsa : 0.11241747s | ||
| 6. lapjvs : 0.11372150s | ||
| 7. lapjvc : 26.41295845s | ||
| ------------------------------- | ||
@@ -299,17 +314,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🏆 1.17 x faster | ||
| * lapjv : ✅ Passed 🏆 1.3 x faster | ||
| * lapjvx : ✅ Passed 🏆 1.31 x faster | ||
| * lapjvxa : ✅ Passed 🏆 1.31 x faster | ||
| * lapjvs : ✅ Passed 🏆 2.06 x faster | ||
| * lapjvsa : ✅ Passed 🏆 2.06 x faster | ||
| * lapjvc : ✅ Passed 🐌 1.02 x slower | ||
| * lapjv : ✅ Passed 🐌 1.62 x slower | ||
| * lapjvx : ✅ Passed 🐌 1.62 x slower | ||
| * lapjvxa : ✅ Passed 🐌 1.62 x slower | ||
| * lapjvs : ✅ Passed 🏆 1.76 x faster | ||
| * lapjvsa : ✅ Passed 🏆 1.76 x faster | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvs : 1.16571718s | ||
| 2. lapjvsa : 1.16708573s | ||
| 3. lapjvxa : 1.84065010s | ||
| 4. lapjvx : 1.84106529s | ||
| 5. lapjv : 1.84539000s | ||
| 6. lapjvc : 2.04553916s | ||
| 7. scipy ⭐ : 2.40261425s | ||
| 1. lapjvs : 1.34793133s | ||
| 2. lapjvsa : 1.34966543s | ||
| 3. scipy ⭐ : 2.37237136s | ||
| 4. lapjvc : 2.41397720s | ||
| 5. lapjvxa : 3.84284193s | ||
| 6. lapjvx : 3.84922083s | ||
| 7. lapjv : 3.85101395s | ||
| ------------------------------- | ||
@@ -320,17 +335,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 230.42 x slower | ||
| * lapjv : ✅ Passed 🏆 2.24 x faster | ||
| * lapjvx : ✅ Passed 🏆 2.54 x faster | ||
| * lapjvxa : ✅ Passed 🏆 2.5 x faster | ||
| * lapjvs : ✅ Passed 🏆 1.66 x faster | ||
| * lapjvsa : ✅ Passed 🏆 1.68 x faster | ||
| * lapjvc : ✅ Passed 🐌 273.6 x slower | ||
| * lapjv : ✅ Passed 🏆 2.03 x faster | ||
| * lapjvx : ✅ Passed 🏆 2.04 x faster | ||
| * lapjvxa : ✅ Passed 🏆 2.05 x faster | ||
| * lapjvs : ✅ Passed 🏆 1.59 x faster | ||
| * lapjvsa : ✅ Passed 🏆 1.63 x faster | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvx : 0.16310518s | ||
| 2. lapjvxa : 0.16545943s | ||
| 3. lapjv : 0.18474822s | ||
| 4. lapjvsa : 0.24673601s | ||
| 5. lapjvs : 0.24954140s | ||
| 6. scipy ⭐ : 0.41429755s | ||
| 7. lapjvc : 95.46102137s | ||
| 1. lapjvxa : 0.19721307s | ||
| 2. lapjvx : 0.19786040s | ||
| 3. lapjv : 0.19958704s | ||
| 4. lapjvsa : 0.24835583s | ||
| 5. lapjvs : 0.25347052s | ||
| 6. scipy ⭐ : 0.40418303s | ||
| 7. lapjvc : 110.58635478s | ||
| ------------------------------- | ||
@@ -343,3 +358,3 @@ ``` | ||
| https://github.com/rathaROG/lapx/actions/runs/18851354956/job/53788495229 | ||
| https://github.com/rathaROG/lapx/actions/runs/18961613065/job/54149890234 | ||
@@ -350,17 +365,17 @@ ``` | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 1.65 x slower | ||
| * lapjv : ✅ Passed 🐌 4.13 x slower | ||
| * lapjvx : ✅ Passed 🐌 1.26 x slower | ||
| * lapjvxa : ✅ Passed 🏆 3.06 x faster | ||
| * lapjvs : ✅ Passed 🐌 1.52 x slower | ||
| * lapjvsa : ✅ Passed 🐌 1.46 x slower | ||
| * lapjvc : ✅ Passed 🐌 1.71 x slower | ||
| * lapjv : ✅ Passed 🐌 5.14 x slower | ||
| * lapjvx : ✅ Passed 🐌 2.32 x slower | ||
| * lapjvxa : ✅ Passed 🐌 1.57 x slower | ||
| * lapjvs : ✅ Passed 🐌 3.57 x slower | ||
| * lapjvsa : ✅ Passed 🐌 3.25 x slower | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvxa : 0.00001037s | ||
| 2. scipy ⭐ : 0.00003179s | ||
| 3. lapjvx : 0.00003996s | ||
| 4. lapjvsa : 0.00004642s | ||
| 5. lapjvs : 0.00004833s | ||
| 6. lapjvc : 0.00005250s | ||
| 7. lapjv : 0.00013146s | ||
| 1. scipy ⭐ : 0.00000567s | ||
| 2. lapjvxa : 0.00000888s | ||
| 3. lapjvc : 0.00000971s | ||
| 4. lapjvx : 0.00001317s | ||
| 5. lapjvsa : 0.00001842s | ||
| 6. lapjvs : 0.00002025s | ||
| 7. lapjv : 0.00002913s | ||
| ------------------------------- | ||
@@ -371,17 +386,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 1.51 x slower | ||
| * lapjv : ✅ Passed 🐌 1.65 x slower | ||
| * lapjvx : ✅ Passed 🐌 1.09 x slower | ||
| * lapjvxa : ✅ Passed 🏆 1.27 x faster | ||
| * lapjvs : ✅ Passed 🐌 1.67 x slower | ||
| * lapjvsa : ✅ Passed 🏆 1.99 x faster | ||
| * lapjvc : ✅ Passed 🐌 1.83 x slower | ||
| * lapjv : ✅ Passed 🐌 4.14 x slower | ||
| * lapjvx : ✅ Passed 🐌 1.83 x slower | ||
| * lapjvxa : ✅ Passed 🐌 1.59 x slower | ||
| * lapjvs : ✅ Passed 🐌 2.01 x slower | ||
| * lapjvsa : ✅ Passed 🏆 1.32 x faster | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvsa : 0.00000308s | ||
| 2. lapjvxa : 0.00000483s | ||
| 3. scipy ⭐ : 0.00000613s | ||
| 4. lapjvx : 0.00000667s | ||
| 5. lapjvc : 0.00000925s | ||
| 6. lapjv : 0.00001013s | ||
| 7. lapjvs : 0.00001021s | ||
| 1. lapjvsa : 0.00000283s | ||
| 2. scipy ⭐ : 0.00000375s | ||
| 3. lapjvxa : 0.00000596s | ||
| 4. lapjvc : 0.00000687s | ||
| 5. lapjvx : 0.00000687s | ||
| 6. lapjvs : 0.00000754s | ||
| 7. lapjv : 0.00001554s | ||
| ------------------------------- | ||
@@ -392,17 +407,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 1.95 x slower | ||
| * lapjv : ✅ Passed 🐌 3.42 x slower | ||
| * lapjvx : ✅ Passed 🐌 2.1 x slower | ||
| * lapjvxa : ✅ Passed 🐌 1.68 x slower | ||
| * lapjvs : ✅ Passed 🐌 3.23 x slower | ||
| * lapjvsa : ✅ Passed 🐌 4.63 x slower | ||
| * lapjvc : ✅ Passed 🐌 2.66 x slower | ||
| * lapjv : ✅ Passed 🐌 5.96 x slower | ||
| * lapjvx : ✅ Passed 🐌 3.03 x slower | ||
| * lapjvxa : ✅ Passed 🐌 2.6 x slower | ||
| * lapjvs : ✅ Passed 🐌 3.77 x slower | ||
| * lapjvsa : ✅ Passed 🐌 4.37 x slower | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. scipy ⭐ : 0.00000429s | ||
| 2. lapjvxa : 0.00000721s | ||
| 3. lapjvc : 0.00000837s | ||
| 4. lapjvx : 0.00000900s | ||
| 5. lapjvs : 0.00001387s | ||
| 6. lapjv : 0.00001467s | ||
| 7. lapjvsa : 0.00001988s | ||
| 1. scipy ⭐ : 0.00000292s | ||
| 2. lapjvxa : 0.00000758s | ||
| 3. lapjvc : 0.00000775s | ||
| 4. lapjvx : 0.00000883s | ||
| 5. lapjvs : 0.00001100s | ||
| 6. lapjvsa : 0.00001275s | ||
| 7. lapjv : 0.00001738s | ||
| ------------------------------- | ||
@@ -413,17 +428,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 1.49 x slower | ||
| * lapjv : ✅ Passed 🏆 1.7 x faster | ||
| * lapjvx : ✅ Passed 🏆 2.13 x faster | ||
| * lapjvxa : ✅ Passed 🏆 2.67 x faster | ||
| * lapjvs : ✅ Passed 🏆 1.8 x faster | ||
| * lapjvsa : ✅ Passed 🏆 1.89 x faster | ||
| * lapjvc : ✅ Passed 🐌 2.19 x slower | ||
| * lapjv : ✅ Passed 🏆 1.34 x faster | ||
| * lapjvx : ✅ Passed 🏆 1.83 x faster | ||
| * lapjvxa : ✅ Passed 🏆 2.24 x faster | ||
| * lapjvs : ✅ Passed 🏆 1.74 x faster | ||
| * lapjvsa : ✅ Passed 🏆 1.22 x faster | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvxa : 0.00002146s | ||
| 2. lapjvx : 0.00002683s | ||
| 3. lapjvsa : 0.00003033s | ||
| 4. lapjvs : 0.00003183s | ||
| 5. lapjv : 0.00003358s | ||
| 6. scipy ⭐ : 0.00005721s | ||
| 7. lapjvc : 0.00008517s | ||
| 1. lapjvxa : 0.00001796s | ||
| 2. lapjvx : 0.00002200s | ||
| 3. lapjvs : 0.00002308s | ||
| 4. lapjv : 0.00002992s | ||
| 5. lapjvsa : 0.00003283s | ||
| 6. scipy ⭐ : 0.00004017s | ||
| 7. lapjvc : 0.00008783s | ||
| ------------------------------- | ||
@@ -434,17 +449,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🏆 1.41 x faster | ||
| * lapjv : ✅ Passed 🏆 3.95 x faster | ||
| * lapjvx : ✅ Passed 🏆 4.4 x faster | ||
| * lapjvxa : ✅ Passed 🏆 6.29 x faster | ||
| * lapjvs : ✅ Passed 🏆 3.59 x faster | ||
| * lapjvsa : ✅ Passed 🏆 6.65 x faster | ||
| * lapjvc : ✅ Passed 🏆 1.16 x faster | ||
| * lapjv : ✅ Passed 🏆 2.13 x faster | ||
| * lapjvx : ✅ Passed 🏆 2.69 x faster | ||
| * lapjvxa : ✅ Passed 🏆 3.29 x faster | ||
| * lapjvs : ✅ Passed 🏆 2.35 x faster | ||
| * lapjvsa : ✅ Passed 🏆 3.41 x faster | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvsa : 0.00001100s | ||
| 2. lapjvxa : 0.00001162s | ||
| 3. lapjvx : 0.00001662s | ||
| 4. lapjv : 0.00001850s | ||
| 5. lapjvs : 0.00002038s | ||
| 6. lapjvc : 0.00005183s | ||
| 7. scipy ⭐ : 0.00007312s | ||
| 1. lapjvsa : 0.00002146s | ||
| 2. lapjvxa : 0.00002221s | ||
| 3. lapjvx : 0.00002721s | ||
| 4. lapjvs : 0.00003108s | ||
| 5. lapjv : 0.00003437s | ||
| 6. lapjvc : 0.00006300s | ||
| 7. scipy ⭐ : 0.00007317s | ||
| ------------------------------- | ||
@@ -455,17 +470,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 2.17 x slower | ||
| * lapjv : ✅ Passed 🏆 1.56 x faster | ||
| * lapjvx : ✅ Passed 🏆 1.73 x faster | ||
| * lapjvxa : ✅ Passed 🏆 2.29 x faster | ||
| * lapjvs : ✅ Passed 🏆 1.46 x faster | ||
| * lapjvsa : ✅ Passed 🏆 1.4 x faster | ||
| * lapjvc : ✅ Passed 🐌 2.31 x slower | ||
| * lapjv : ✅ Passed 🏆 1.15 x faster | ||
| * lapjvx : ✅ Passed 🏆 1.54 x faster | ||
| * lapjvxa : ✅ Passed 🏆 1.89 x faster | ||
| * lapjvs : ✅ Passed 🏆 1.47 x faster | ||
| * lapjvsa : ✅ Passed 🏆 1.53 x faster | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvxa : 0.00001733s | ||
| 2. lapjvx : 0.00002292s | ||
| 3. lapjv : 0.00002546s | ||
| 4. lapjvs : 0.00002713s | ||
| 5. lapjvsa : 0.00002838s | ||
| 6. scipy ⭐ : 0.00003962s | ||
| 7. lapjvc : 0.00008579s | ||
| 1. lapjvxa : 0.00001904s | ||
| 2. lapjvx : 0.00002342s | ||
| 3. lapjvsa : 0.00002350s | ||
| 4. lapjvs : 0.00002458s | ||
| 5. lapjv : 0.00003133s | ||
| 6. scipy ⭐ : 0.00003604s | ||
| 7. lapjvc : 0.00008329s | ||
| ------------------------------- | ||
@@ -476,17 +491,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 5.08 x slower | ||
| * lapjv : ✅ Passed 🏆 1.44 x faster | ||
| * lapjvx : ✅ Passed 🏆 1.95 x faster | ||
| * lapjvxa : ✅ Passed 🏆 3.47 x faster | ||
| * lapjvs : ✅ Passed 🏆 2.22 x faster | ||
| * lapjvsa : ✅ Passed 🏆 2.18 x faster | ||
| * lapjvc : ✅ Passed 🐌 6.57 x slower | ||
| * lapjv : ✅ Passed 🏆 3.31 x faster | ||
| * lapjvx : ✅ Passed 🏆 3.69 x faster | ||
| * lapjvxa : ✅ Passed 🏆 3.81 x faster | ||
| * lapjvs : ✅ Passed 🏆 2.86 x faster | ||
| * lapjvsa : ✅ Passed 🏆 3.05 x faster | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvxa : 0.00098750s | ||
| 2. lapjvs : 0.00154171s | ||
| 3. lapjvsa : 0.00157042s | ||
| 4. lapjvx : 0.00175387s | ||
| 5. lapjv : 0.00237538s | ||
| 6. scipy ⭐ : 0.00342258s | ||
| 7. lapjvc : 0.01738238s | ||
| 1. lapjvxa : 0.00073746s | ||
| 2. lapjvx : 0.00076150s | ||
| 3. lapjv : 0.00085017s | ||
| 4. lapjvsa : 0.00092200s | ||
| 5. lapjvs : 0.00098329s | ||
| 6. scipy ⭐ : 0.00281312s | ||
| 7. lapjvc : 0.01849563s | ||
| ------------------------------- | ||
@@ -497,17 +512,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 1.23 x slower | ||
| * lapjv : ✅ Passed 🏆 3.51 x faster | ||
| * lapjvx : ✅ Passed 🏆 3.61 x faster | ||
| * lapjvxa : ✅ Passed 🏆 3.69 x faster | ||
| * lapjvs : ✅ Passed 🏆 3.29 x faster | ||
| * lapjvsa : ✅ Passed 🏆 3.42 x faster | ||
| * lapjvc : ✅ Passed 🐌 1.37 x slower | ||
| * lapjv : ✅ Passed 🏆 1.09 x faster | ||
| * lapjvx : ✅ Passed 🏆 1.11 x faster | ||
| * lapjvxa : ✅ Passed 🏆 1.18 x faster | ||
| * lapjvs : ✅ Passed 🏆 1.06 x faster | ||
| * lapjvsa : ✅ Passed 🏆 1.04 x faster | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvxa : 0.00192367s | ||
| 2. lapjvx : 0.00196654s | ||
| 3. lapjv : 0.00201971s | ||
| 4. lapjvsa : 0.00207583s | ||
| 5. lapjvs : 0.00215921s | ||
| 6. scipy ⭐ : 0.00709604s | ||
| 7. lapjvc : 0.00875521s | ||
| 1. lapjvxa : 0.00629000s | ||
| 2. lapjvx : 0.00669658s | ||
| 3. lapjv : 0.00680446s | ||
| 4. lapjvs : 0.00702367s | ||
| 5. lapjvsa : 0.00716958s | ||
| 6. scipy ⭐ : 0.00744075s | ||
| 7. lapjvc : 0.01022246s | ||
| ------------------------------- | ||
@@ -518,17 +533,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 5.09 x slower | ||
| * lapjv : ✅ Passed 🏆 3.0 x faster | ||
| * lapjvx : ✅ Passed 🏆 3.6 x faster | ||
| * lapjvxa : ✅ Passed 🏆 3.71 x faster | ||
| * lapjvs : ✅ Passed 🏆 2.83 x faster | ||
| * lapjvsa : ✅ Passed 🏆 2.74 x faster | ||
| * lapjvc : ✅ Passed 🐌 5.87 x slower | ||
| * lapjv : ✅ Passed 🏆 2.05 x faster | ||
| * lapjvx : ✅ Passed 🏆 3.86 x faster | ||
| * lapjvxa : ✅ Passed 🏆 2.83 x faster | ||
| * lapjvs : ✅ Passed 🏆 3.29 x faster | ||
| * lapjvsa : ✅ Passed 🏆 3.35 x faster | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvxa : 0.00110967s | ||
| 2. lapjvx : 0.00114104s | ||
| 3. lapjv : 0.00137046s | ||
| 4. lapjvs : 0.00145308s | ||
| 5. lapjvsa : 0.00150308s | ||
| 6. scipy ⭐ : 0.00411283s | ||
| 7. lapjvc : 0.02093913s | ||
| 1. lapjvx : 0.00102792s | ||
| 2. lapjvsa : 0.00118358s | ||
| 3. lapjvs : 0.00120550s | ||
| 4. lapjvxa : 0.00140042s | ||
| 5. lapjv : 0.00192958s | ||
| 6. scipy ⭐ : 0.00396425s | ||
| 7. lapjvc : 0.02326779s | ||
| ------------------------------- | ||
@@ -539,17 +554,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 199.7 x slower | ||
| * lapjv : ✅ Passed 🐌 1.48 x slower | ||
| * lapjvx : ✅ Passed 🏆 1.17 x faster | ||
| * lapjvxa : ✅ Passed 🏆 1.08 x faster | ||
| * lapjvs : ✅ Passed 🐌 1.75 x slower | ||
| * lapjvsa : ✅ Passed 🐌 1.58 x slower | ||
| * lapjvc : ✅ Passed 🐌 349.67 x slower | ||
| * lapjv : ✅ Passed 🐌 3.52 x slower | ||
| * lapjvx : ✅ Passed 🐌 1.43 x slower | ||
| * lapjvxa : ✅ Passed 🐌 1.45 x slower | ||
| * lapjvs : ✅ Passed 🐌 3.18 x slower | ||
| * lapjvsa : ✅ Passed 🐌 2.84 x slower | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvx : 0.09995704s | ||
| 2. lapjvxa : 0.10858558s | ||
| 3. scipy ⭐ : 0.11726183s | ||
| 4. lapjv : 0.17386667s | ||
| 5. lapjvsa : 0.18564625s | ||
| 6. lapjvs : 0.20479308s | ||
| 7. lapjvc : 23.41745225s | ||
| 1. scipy ⭐ : 0.07873675s | ||
| 2. lapjvx : 0.11293429s | ||
| 3. lapjvxa : 0.11401713s | ||
| 4. lapjvsa : 0.22370100s | ||
| 5. lapjvs : 0.25039458s | ||
| 6. lapjv : 0.27737229s | ||
| 7. lapjvc : 27.53160242s | ||
| ------------------------------- | ||
@@ -560,17 +575,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 2.01 x slower | ||
| * lapjv : ✅ Passed 🏆 1.21 x faster | ||
| * lapjvx : ✅ Passed 🏆 1.25 x faster | ||
| * lapjvxa : ✅ Passed 🏆 1.19 x faster | ||
| * lapjvs : ✅ Passed 🏆 1.73 x faster | ||
| * lapjvsa : ✅ Passed 🏆 1.84 x faster | ||
| * lapjvc : ✅ Passed 🐌 1.35 x slower | ||
| * lapjv : ✅ Passed 🏆 2.36 x faster | ||
| * lapjvx : ✅ Passed 🏆 2.15 x faster | ||
| * lapjvxa : ✅ Passed 🏆 2.23 x faster | ||
| * lapjvs : ✅ Passed 🏆 2.52 x faster | ||
| * lapjvsa : ✅ Passed 🏆 2.89 x faster | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvsa : 0.92814183s | ||
| 2. lapjvs : 0.98585971s | ||
| 3. lapjvx : 1.36841413s | ||
| 4. lapjv : 1.41392200s | ||
| 5. lapjvxa : 1.43138329s | ||
| 6. scipy ⭐ : 1.70584088s | ||
| 7. lapjvc : 3.43550417s | ||
| 1. lapjvsa : 0.90473763s | ||
| 2. lapjvs : 1.03581571s | ||
| 3. lapjv : 1.10758192s | ||
| 4. lapjvxa : 1.17104621s | ||
| 5. lapjvx : 1.21566396s | ||
| 6. scipy ⭐ : 2.61083117s | ||
| 7. lapjvc : 3.53453567s | ||
| ------------------------------- | ||
@@ -581,17 +596,17 @@ | ||
| ----------------------------------------- | ||
| * lapjvc : ✅ Passed 🐌 283.01 x slower | ||
| * lapjv : ✅ Passed 🐌 2.24 x slower | ||
| * lapjvx : ✅ Passed 🏆 1.39 x faster | ||
| * lapjvxa : ✅ Passed 🏆 2.27 x faster | ||
| * lapjvs : ✅ Passed 🐌 1.5 x slower | ||
| * lapjvsa : ✅ Passed 🐌 1.79 x slower | ||
| * lapjvc : ✅ Passed 🐌 308.42 x slower | ||
| * lapjv : ✅ Passed 🐌 2.1 x slower | ||
| * lapjvx : ✅ Passed 🏆 1.32 x faster | ||
| * lapjvxa : ✅ Passed 🏆 1.87 x faster | ||
| * lapjvs : ✅ Passed 🐌 2.3 x slower | ||
| * lapjvsa : ✅ Passed 🐌 1.98 x slower | ||
| ----- 🎉 SPEED RANKING 🎉 ----- | ||
| 1. lapjvxa : 0.16666625s | ||
| 2. lapjvx : 0.27308525s | ||
| 3. scipy ⭐ : 0.37829733s | ||
| 4. lapjvs : 0.56853200s | ||
| 5. lapjvsa : 0.67584400s | ||
| 6. lapjv : 0.84921917s | ||
| 7. lapjvc : 107.06137171s | ||
| 1. lapjvxa : 0.18422821s | ||
| 2. lapjvx : 0.26109171s | ||
| 3. scipy ⭐ : 0.34502992s | ||
| 4. lapjvsa : 0.68365575s | ||
| 5. lapjv : 0.72371579s | ||
| 6. lapjvs : 0.79274062s | ||
| 7. lapjvc : 106.41484879s | ||
| ------------------------------- | ||
@@ -602,3 +617,3 @@ ``` | ||
| 👁️ See newer benchmark results on all platforms [here on GitHub](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml). | ||
| 👁️ See newer benchmark results on all platforms [here on GitHub](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml). | ||
@@ -609,3 +624,3 @@ ## 🕵️♂️ Other Benchmarks | ||
| This [benchmark_tracking.py](https://github.com/rathaROG/lapx/blob/main/.github/test/benchmark_tracking.py) is specifically desinged for the Object Tracking applications, with [SciPy](https://pypi.org/project/scipy/) as the baseline. | ||
| This [benchmark_tracking.py](https://github.com/rathaROG/lapx/blob/main/benchmarks/benchmark_tracking.py) is specifically desinged for ***Object Tracking*** application, with [SciPy](https://pypi.org/project/scipy/) as the baseline. | ||
@@ -616,7 +631,7 @@ ``` | ||
| git clone https://github.com/rathaROG/lapx.git | ||
| cd lapx/.github/test | ||
| cd lapx/benchmarks | ||
| python benchmark_tracking.py | ||
| ``` | ||
| As shown in the updated benchmark results below, the new function [`lapjvx()`](https://github.com/rathaROG/lapx#2-the-new-function-lapjvx) (LAPX LAPJVX in the tables) and the original [`lapjv()`](https://github.com/rathaROG/lapx#1-the-original-function-lapjv) (LAPX LAPJV in the tables) consistently matches the baseline outputs of SciPy's [`linear_sum_assignment`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html), as indicated by “✓” and ✅ in the tables. | ||
| As shown in the benchmark results below, the new function [`lapjvx()`](https://github.com/rathaROG/lapx#2-the-new-function-lapjvx) (LAPX LAPJVX in the tables) and the original [`lapjv()`](https://github.com/rathaROG/lapx#1-the-original-function-lapjv) (LAPX LAPJV in the tables) consistently matches the baseline outputs of SciPy's [`linear_sum_assignment`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html), as indicated by “✓” and ✅ in the tables. | ||
@@ -627,9 +642,19 @@ In most scenarios, `lapjvx()` and `lapjv()` demonstrate faster performance than the baseline SciPy's `linear_sum_assignment`, and they remain competitive with other LAPX variants such as [`lapjvc`](https://github.com/rathaROG/lapx#4-the-new-function-lapjvc) (LAPX LAPJVC in the tables). When in-function filtering with `cost_limit` is used, `lapjv()` (LAPX LAPJV-IFT in the tables) experiences a significant performance impact and can produce different outputs compared to SciPy's baseline, as indicated by “✗” and ⚠️ in the tables. | ||
| 💡 To achieve optimal performance of `lapjvx()` or `lapjv()` in object tracking application, follow the implementation in the current [`benchmark_tracking.py`](https://github.com/rathaROG/lapx/blob/main/.github/test/benchmark_tracking.py) script. | ||
| 💡 To achieve optimal performance of `lapjvx()` or `lapjv()` in object tracking application, follow the implementation in the current [`benchmark_tracking.py`](https://github.com/rathaROG/lapx/blob/main/benchmarks/benchmark_tracking.py) script. | ||
| <details><summary>📊 Show the results:</summary> | ||
| <details><summary>📊 Show the results:</summary><br> | ||
| https://github.com/rathaROG/lapx/actions/runs/18830580672/job/53721233510 | ||
| Run on my local Windows 11 i9-13900KS (8 p-core + 8 e-core) + python 3.11.9 | ||
| ``` | ||
| numpy==2.2.6 | ||
| scipy==1.16.3 | ||
| lapx @ git+https://github.com/rathaROG/lapx.git@ca0bbee8e319fe005c557d5a2bcce1148d89797c | ||
| ``` | ||
| ``` | ||
| Microsoft Windows [Version 10.0.26200.7019] | ||
| (c) Microsoft Corporation. All rights reserved. | ||
| D:\DEV\lapx_all\tmp\lapx\benchmarks>python benchmark_tracking.py | ||
| ################################################################# | ||
@@ -640,13 +665,13 @@ # Benchmark with threshold (cost_limit) = 0.05 | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| 10x10 | 0.000325s 6th | 0.000137s ✗ 1st | 0.000168s ✓ 3rd | 0.000177s ✓ 4th | 0.000206s ✓ 5th | 0.000162s ✓ 2nd | ||
| 25x20 | 0.000170s 5th | 0.000191s ✗ 6th | 0.000155s ✓ 1st | 0.000170s ✓ 4th | 0.000162s ✓ 3rd | 0.000160s ✓ 2nd | ||
| 50x50 | 0.000265s 6th | 0.000214s ✗ 4th | 0.000190s ✓ 1st | 0.000193s ✓ 2nd | 0.000246s ✓ 5th | 0.000194s ✓ 3rd | ||
| 100x150 | 0.000453s 4th | 0.001335s ✓ 6th | 0.000402s ✓ 3rd | 0.000396s ✓ 2nd | 0.001067s ✓ 5th | 0.000330s ✓ 1st | ||
| 250x250 | 0.002854s 5th | 0.002952s ✓ 6th | 0.001731s ✓ 3rd | 0.001559s ✓ 2nd | 0.001977s ✓ 4th | 0.001536s ✓ 1st | ||
| 550x500 | 0.008365s 1st | 0.064973s ✓ 6th | 0.012927s ✓ 4th | 0.011949s ✓ 3rd | 0.030009s ✓ 5th | 0.011664s ✓ 2nd | ||
| 1000x1000 | 0.051245s 2nd | 0.111529s ✓ 6th | 0.057231s ✓ 5th | 0.055981s ✓ 4th | 0.044361s ✓ 1st | 0.055155s ✓ 3rd | ||
| 2000x2500 | 0.075957s 4th | 3.645535s ✓ 6th | 0.020558s ✓ 1st | 0.020706s ✓ 2nd | 2.975010s ✓ 5th | 0.034655s ✓ 3rd | ||
| 5000x5000 | 2.114572s 5th | 2.563577s ✓ 6th | 1.214149s ✓ 2nd | 1.219787s ✓ 3rd | 1.844269s ✓ 4th | 0.959447s ✓ 1st | ||
| 10x10 | 0.000063s 4th | 0.000057s ✓ 2nd | 0.000063s ✓ 5th | 0.000069s ✓ 6th | 0.000061s ✓ 3rd | 0.000057s ✓ 1st | ||
| 25x20 | 0.000058s 4th | 0.000104s ✗ 6th | 0.000062s ✓ 5th | 0.000052s ✓ 2nd | 0.000058s ✓ 3rd | 0.000051s ✓ 1st | ||
| 50x50 | 0.000083s 4th | 0.000086s ✗ 5th | 0.000068s ✓ 3rd | 0.000058s ✓ 1st | 0.000103s ✓ 6th | 0.000063s ✓ 2nd | ||
| 100x150 | 0.000131s 2nd | 0.000828s ✗ 6th | 0.000132s ✓ 3rd | 0.000144s ✓ 4th | 0.000680s ✓ 5th | 0.000123s ✓ 1st | ||
| 250x250 | 0.001126s 4th | 0.001218s ✓ 5th | 0.000557s ✓ 2nd | 0.000537s ✓ 1st | 0.001516s ✓ 6th | 0.000605s ✓ 3rd | ||
| 550x500 | 0.003531s 4th | 0.011714s ✓ 5th | 0.001424s ✓ 2nd | 0.001358s ✓ 1st | 0.017545s ✓ 6th | 0.001511s ✓ 3rd | ||
| 1000x1000 | 0.022934s 4th | 0.026359s ✓ 5th | 0.010415s ✓ 2nd | 0.010320s ✓ 1st | 0.031669s ✓ 6th | 0.012068s ✓ 3rd | ||
| 2000x2500 | 0.034198s 4th | 1.627013s ✓ 6th | 0.013647s ✓ 1st | 0.015660s ✓ 2nd | 1.531048s ✓ 5th | 0.022275s ✓ 3rd | ||
| 5000x5000 | 1.095034s 3rd | 2.335637s ✓ 6th | 1.082954s ✓ 2nd | 1.103870s ✓ 4th | 1.140890s ✓ 5th | 0.496765s ✓ 1st | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
@@ -656,10 +681,10 @@ | ||
| 🎉 --------------------------- OVERALL RANKING --------------------------- 🎉 | ||
| 1. LAPX LAPJVS : 1063.3028 ms | ✅ | 🥇x3 🥈x3 🥉x3 | ||
| 2. LAPX LAPJV : 1307.5116 ms | ✅ | 🥇x3 🥈x1 🥉x3 🚩x1 🏳️x1 | ||
| 3. LAPX LAPJVX : 1310.9174 ms | ✅ | 🥈x4 🥉x2 🚩x3 | ||
| 4. BASELINE SciPy : 2254.2068 ms | ⭐ | 🥇x1 🥈x1 🚩x2 🏳️x3 🥴x2 | ||
| 5. LAPX LAPJVC : 4897.3061 ms | ✅ | 🥇x1 🥉x1 🚩x2 🏳️x5 | ||
| 6. LAPX LAPJV-IFT : 6390.4416 ms | ⚠️ | 🥇x1 🚩x1 🥴x7 | ||
| 🎉 ------------------------------------------------------------------------- 🎉 | ||
| 🎉 --------------------------- OVERALL RANKING --------------------------- 🎉 | ||
| 1. LAPX LAPJVS : 533.5186 ms | ✅ | 🥇x4 🥈x1 🥉x4 | ||
| 2. LAPX LAPJV : 1109.3228 ms | ✅ | 🥇x1 🥈x4 🥉x2 🏳️x2 | ||
| 3. LAPX LAPJVX : 1132.0674 ms | ✅ | 🥇x4 🥈x2 🚩x2 🥴x1 | ||
| 4. BASELINE SciPy : 1157.1577 ms | ⭐ | 🥈x1 🥉x1 🚩x7 | ||
| 5. LAPX LAPJVC : 2723.5708 ms | ✅ | 🥉x2 🏳️x3 🥴x4 | ||
| 6. LAPX LAPJV-IFT : 4003.0145 ms | ⚠️ | 🥈x1 🏳️x4 🥴x4 | ||
| 🎉 ------------------------------------------------------------------------- 🎉 | ||
@@ -672,13 +697,13 @@ | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| 10x10 | 0.000188s 6th | 0.000157s ✗ 5th | 0.000130s ✓ 3rd | 0.000129s ✓ 1st | 0.000139s ✓ 4th | 0.000129s ✓ 2nd | ||
| 25x20 | 0.000152s 3rd | 0.000171s ✗ 6th | 0.000148s ✓ 1st | 0.000149s ✓ 2nd | 0.000159s ✓ 4th | 0.000160s ✓ 5th | ||
| 50x50 | 0.000245s 6th | 0.000230s ✗ 4th | 0.000188s ✓ 1st | 0.000194s ✓ 2nd | 0.000231s ✓ 5th | 0.000197s ✓ 3rd | ||
| 100x150 | 0.000417s 4th | 0.001254s ✓ 6th | 0.000334s ✓ 2nd | 0.000333s ✓ 1st | 0.000887s ✓ 5th | 0.000349s ✓ 3rd | ||
| 250x250 | 0.002642s 5th | 0.003365s ✓ 6th | 0.001734s ✓ 2nd | 0.001751s ✓ 3rd | 0.002294s ✓ 4th | 0.001708s ✓ 1st | ||
| 550x500 | 0.007055s 1st | 0.127557s ✓ 6th | 0.011708s ✓ 4th | 0.011700s ✓ 3rd | 0.040566s ✓ 5th | 0.011671s ✓ 2nd | ||
| 1000x1000 | 0.045616s 5th | 0.085374s ✓ 6th | 0.041577s ✓ 3rd | 0.041732s ✓ 4th | 0.040828s ✓ 1st | 0.041053s ✓ 2nd | ||
| 2000x2500 | 0.075874s 4th | 3.594363s ✓ 6th | 0.020592s ✓ 2nd | 0.020426s ✓ 1st | 2.840181s ✓ 5th | 0.024470s ✓ 3rd | ||
| 5000x5000 | 2.493812s 5th | 3.415118s ✓ 6th | 1.651438s ✓ 3rd | 1.646662s ✓ 2nd | 2.010495s ✓ 4th | 0.994217s ✓ 1st | ||
| 10x10 | 0.000048s 2nd | 0.000055s ✗ 5th | 0.000048s ✓ 3rd | 0.000039s ✓ 1st | 0.000054s ✓ 4th | 0.000055s ✓ 6th | ||
| 25x20 | 0.000048s 3rd | 0.000058s ✗ 6th | 0.000055s ✓ 4th | 0.000047s ✓ 2nd | 0.000055s ✓ 5th | 0.000047s ✓ 1st | ||
| 50x50 | 0.000077s 4th | 0.000080s ✗ 6th | 0.000057s ✓ 3rd | 0.000048s ✓ 1st | 0.000078s ✓ 5th | 0.000051s ✓ 2nd | ||
| 100x150 | 0.000112s 3rd | 0.000635s ✓ 6th | 0.000123s ✓ 4th | 0.000092s ✓ 1st | 0.000588s ✓ 5th | 0.000093s ✓ 2nd | ||
| 250x250 | 0.000991s 4th | 0.001352s ✓ 6th | 0.000536s ✓ 1st | 0.000536s ✓ 2nd | 0.001200s ✓ 5th | 0.000591s ✓ 3rd | ||
| 550x500 | 0.003480s 4th | 0.010844s ✓ 5th | 0.001426s ✓ 2nd | 0.001311s ✓ 1st | 0.016003s ✓ 6th | 0.001447s ✓ 3rd | ||
| 1000x1000 | 0.023240s 4th | 0.026984s ✓ 5th | 0.009923s ✓ 2nd | 0.009682s ✓ 1st | 0.027498s ✓ 6th | 0.011329s ✓ 3rd | ||
| 2000x2500 | 0.034578s 4th | 1.563681s ✓ 5th | 0.014135s ✓ 2nd | 0.014121s ✓ 1st | 1.596397s ✓ 6th | 0.022706s ✓ 3rd | ||
| 5000x5000 | 1.070328s 2nd | 3.315799s ✓ 6th | 1.622128s ✓ 4th | 1.628149s ✓ 5th | 1.100956s ✓ 3rd | 0.537018s ✓ 1st | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
@@ -688,10 +713,10 @@ | ||
| 🎉 --------------------------- OVERALL RANKING --------------------------- 🎉 | ||
| 1. LAPX LAPJVS : 1073.9525 ms | ✅ | 🥇x2 🥈x3 🥉x3 🏳️x1 | ||
| 2. LAPX LAPJVX : 1723.0752 ms | ✅ | 🥇x3 🥈x3 🥉x2 🚩x1 | ||
| 3. LAPX LAPJV : 1727.8499 ms | ✅ | 🥇x2 🥈x3 🥉x3 🚩x1 | ||
| 4. BASELINE SciPy : 2626.0008 ms | ⭐ | 🥇x1 🥉x1 🚩x2 🏳️x3 🥴x2 | ||
| 5. LAPX LAPJVC : 4935.7815 ms | ✅ | 🥇x1 🚩x4 🏳️x4 | ||
| 6. LAPX LAPJV-IFT : 7227.5912 ms | ⚠️ | 🚩x1 🏳️x1 🥴x7 | ||
| 🎉 ------------------------------------------------------------------------- 🎉 | ||
| 🎉 --------------------------- OVERALL RANKING --------------------------- 🎉 | ||
| 1. LAPX LAPJVS : 573.3374 ms | ✅ | 🥇x2 🥈x2 🥉x4 🥴x1 | ||
| 2. BASELINE SciPy : 1132.9018 ms | ⭐ | 🥈x2 🥉x2 🚩x5 | ||
| 3. LAPX LAPJV : 1648.4320 ms | ✅ | 🥇x1 🥈x3 🥉x2 🚩x3 | ||
| 4. LAPX LAPJVX : 1654.0251 ms | ✅ | 🥇x6 🥈x2 🏳️x1 | ||
| 5. LAPX LAPJVC : 2742.8296 ms | ✅ | 🥉x1 🚩x1 🏳️x4 🥴x3 | ||
| 6. LAPX LAPJV-IFT : 4919.4888 ms | ⚠️ | 🏳️x4 🥴x5 | ||
| 🎉 ------------------------------------------------------------------------- 🎉 | ||
@@ -704,13 +729,13 @@ | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| 10x10 | 0.000218s 6th | 0.000129s ✓ 1st | 0.000131s ✓ 3rd | 0.000154s ✓ 5th | 0.000139s ✓ 4th | 0.000131s ✓ 2nd | ||
| 25x20 | 0.000150s 1st | 0.000178s ✓ 6th | 0.000151s ✓ 2nd | 0.000158s ✓ 4th | 0.000163s ✓ 5th | 0.000155s ✓ 3rd | ||
| 50x50 | 0.000211s 5th | 0.000194s ✓ 3rd | 0.000283s ✓ 6th | 0.000180s ✓ 1st | 0.000194s ✓ 2nd | 0.000197s ✓ 4th | ||
| 100x150 | 0.000404s 4th | 0.001266s ✓ 6th | 0.000344s ✓ 3rd | 0.000318s ✓ 1st | 0.000955s ✓ 5th | 0.000341s ✓ 2nd | ||
| 250x250 | 0.002778s 6th | 0.002573s ✓ 5th | 0.001292s ✓ 2nd | 0.001324s ✓ 3rd | 0.001683s ✓ 4th | 0.001267s ✓ 1st | ||
| 550x500 | 0.007734s 1st | 0.238647s ✓ 6th | 0.012039s ✓ 4th | 0.011927s ✓ 3rd | 0.040219s ✓ 5th | 0.011922s ✓ 2nd | ||
| 1000x1000 | 0.046291s 5th | 0.075969s ✓ 6th | 0.036951s ✓ 2nd | 0.037461s ✓ 3rd | 0.039884s ✓ 4th | 0.020911s ✓ 1st | ||
| 2000x2500 | 0.076470s 4th | 3.556511s ✓ 6th | 0.020127s ✓ 1st | 0.020713s ✓ 2nd | 2.866433s ✓ 5th | 0.023518s ✓ 3rd | ||
| 5000x5000 | 2.853023s 5th | 2.870481s ✓ 6th | 1.372504s ✓ 3rd | 1.367633s ✓ 2nd | 1.949158s ✓ 4th | 1.205538s ✓ 1st | ||
| 10x10 | 0.000051s 6th | 0.000045s ✓ 5th | 0.000045s ✓ 4th | 0.000040s ✓ 2nd | 0.000043s ✓ 3rd | 0.000039s ✓ 1st | ||
| 25x20 | 0.000043s 1st | 0.000055s ✓ 6th | 0.000054s ✓ 4th | 0.000046s ✓ 2nd | 0.000054s ✓ 5th | 0.000046s ✓ 3rd | ||
| 50x50 | 0.000070s 4th | 0.000076s ✓ 5th | 0.000060s ✓ 3rd | 0.000049s ✓ 1st | 0.000089s ✓ 6th | 0.000054s ✓ 2nd | ||
| 100x150 | 0.000113s 4th | 0.000646s ✓ 6th | 0.000103s ✓ 3rd | 0.000095s ✓ 2nd | 0.000616s ✓ 5th | 0.000095s ✓ 1st | ||
| 250x250 | 0.001064s 4th | 0.001522s ✓ 6th | 0.000643s ✓ 2nd | 0.000591s ✓ 1st | 0.001448s ✓ 5th | 0.000673s ✓ 3rd | ||
| 550x500 | 0.003672s 4th | 0.010797s ✓ 5th | 0.001429s ✓ 2nd | 0.001405s ✓ 1st | 0.015196s ✓ 6th | 0.001497s ✓ 3rd | ||
| 1000x1000 | 0.019571s 4th | 0.027457s ✓ 6th | 0.010368s ✓ 1st | 0.011375s ✓ 3rd | 0.024061s ✓ 5th | 0.010495s ✓ 2nd | ||
| 2000x2500 | 0.038530s 4th | 1.654156s ✓ 6th | 0.015500s ✓ 2nd | 0.014464s ✓ 1st | 1.561805s ✓ 5th | 0.022967s ✓ 3rd | ||
| 5000x5000 | 0.969325s 5th | 1.507703s ✓ 6th | 0.668259s ✓ 3rd | 0.656468s ✓ 2nd | 0.954102s ✓ 4th | 0.475278s ✓ 1st | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
@@ -720,10 +745,10 @@ | ||
| 🎉 --------------------------- OVERALL RANKING --------------------------- 🎉 | ||
| 1. LAPX LAPJVS : 1263.9814 ms | ✅ | 🥇x3 🥈x3 🥉x2 🚩x1 | ||
| 2. LAPX LAPJVX : 1439.8675 ms | ✅ | 🥇x2 🥈x2 🥉x3 🚩x1 🏳️x1 | ||
| 3. LAPX LAPJV : 1443.8227 ms | ✅ | 🥇x1 🥈x3 🥉x3 🚩x1 🥴x1 | ||
| 4. BASELINE SciPy : 2987.2792 ms | ⭐ | 🥇x2 🚩x2 🏳️x3 🥴x2 | ||
| 5. LAPX LAPJVC : 4898.8268 ms | ✅ | 🥈x1 🚩x4 🏳️x4 | ||
| 6. LAPX LAPJV-IFT : 6745.9482 ms | ✅ | 🥇x1 🥉x1 🏳️x1 🥴x6 | ||
| 🎉 ------------------------------------------------------------------------- 🎉 | ||
| 🎉 --------------------------- OVERALL RANKING --------------------------- 🎉 | ||
| 1. LAPX LAPJVS : 511.1457 ms | ✅ | 🥇x3 🥈x2 🥉x4 | ||
| 2. LAPX LAPJVX : 684.5318 ms | ✅ | 🥇x4 🥈x4 🥉x1 | ||
| 3. LAPX LAPJV : 696.4601 ms | ✅ | 🥇x1 🥈x3 🥉x3 🚩x2 | ||
| 4. BASELINE SciPy : 1032.4388 ms | ⭐ | 🥇x1 🚩x6 🏳️x1 🥴x1 | ||
| 5. LAPX LAPJVC : 2557.4136 ms | ✅ | 🥉x1 🚩x1 🏳️x5 🥴x2 | ||
| 6. LAPX LAPJV-IFT : 3202.4579 ms | ✅ | 🏳️x3 🥴x6 | ||
| 🎉 ------------------------------------------------------------------------- 🎉 | ||
@@ -736,13 +761,13 @@ | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| 10x10 | 0.000233s 6th | 0.000127s ✓ 1st | 0.000158s ✓ 5th | 0.000128s ✓ 2nd | 0.000142s ✓ 4th | 0.000129s ✓ 3rd | ||
| 25x20 | 0.000144s 1st | 0.000165s ✓ 5th | 0.000172s ✓ 6th | 0.000159s ✓ 4th | 0.000155s ✓ 3rd | 0.000148s ✓ 2nd | ||
| 50x50 | 0.000269s 6th | 0.000203s ✓ 3rd | 0.000184s ✓ 1st | 0.000191s ✓ 2nd | 0.000223s ✓ 4th | 0.000254s ✓ 5th | ||
| 100x150 | 0.000417s 4th | 0.001233s ✓ 6th | 0.000308s ✓ 1st | 0.000356s ✓ 3rd | 0.001072s ✓ 5th | 0.000326s ✓ 2nd | ||
| 250x250 | 0.002866s 5th | 0.003220s ✓ 6th | 0.001664s ✓ 1st | 0.001702s ✓ 3rd | 0.002252s ✓ 4th | 0.001676s ✓ 2nd | ||
| 550x500 | 0.008314s 1st | 0.249585s ✓ 6th | 0.011429s ✓ 2nd | 0.011442s ✓ 3rd | 0.030332s ✓ 5th | 0.011470s ✓ 4th | ||
| 1000x1000 | 0.046902s 5th | 0.080581s ✓ 6th | 0.039630s ✓ 2nd | 0.039960s ✓ 3rd | 0.045573s ✓ 4th | 0.038703s ✓ 1st | ||
| 2000x2500 | 0.074322s 4th | 3.490300s ✓ 6th | 0.020954s ✓ 1st | 0.021199s ✓ 2nd | 2.761126s ✓ 5th | 0.024095s ✓ 3rd | ||
| 5000x5000 | 2.614220s 5th | 4.553976s ✓ 6th | 2.238387s ✓ 3rd | 2.240301s ✓ 4th | 1.912648s ✓ 2nd | 1.146587s ✓ 1st | ||
| 10x10 | 0.000049s 6th | 0.000046s ✓ 4th | 0.000046s ✓ 5th | 0.000038s ✓ 1st | 0.000044s ✓ 3rd | 0.000041s ✓ 2nd | ||
| 25x20 | 0.000040s 1st | 0.000055s ✓ 6th | 0.000052s ✓ 5th | 0.000043s ✓ 2nd | 0.000051s ✓ 4th | 0.000045s ✓ 3rd | ||
| 50x50 | 0.000067s 4th | 0.000074s ✓ 5th | 0.000058s ✓ 3rd | 0.000053s ✓ 2nd | 0.000081s ✓ 6th | 0.000053s ✓ 1st | ||
| 100x150 | 0.000117s 2nd | 0.000752s ✓ 6th | 0.000123s ✓ 3rd | 0.000126s ✓ 4th | 0.000721s ✓ 5th | 0.000098s ✓ 1st | ||
| 250x250 | 0.001063s 4th | 0.001545s ✓ 6th | 0.000447s ✓ 2nd | 0.000445s ✓ 1st | 0.001303s ✓ 5th | 0.000477s ✓ 3rd | ||
| 550x500 | 0.003711s 4th | 0.011309s ✓ 5th | 0.001524s ✓ 2nd | 0.001460s ✓ 1st | 0.016480s ✓ 6th | 0.001558s ✓ 3rd | ||
| 1000x1000 | 0.019167s 1st | 0.053561s ✓ 6th | 0.025616s ✓ 3rd | 0.025778s ✓ 4th | 0.023447s ✓ 2nd | 0.027353s ✓ 5th | ||
| 2000x2500 | 0.035676s 4th | 1.579856s ✓ 5th | 0.014502s ✓ 2nd | 0.014438s ✓ 1st | 1.699035s ✓ 6th | 0.023144s ✓ 3rd | ||
| 5000x5000 | 1.214213s 5th | 1.230595s ✓ 6th | 0.511229s ✓ 2nd | 0.514490s ✓ 3rd | 1.144982s ✓ 4th | 0.452692s ✓ 1st | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
@@ -752,10 +777,10 @@ | ||
| 🎉 --------------------------- OVERALL RANKING --------------------------- 🎉 | ||
| 1. LAPX LAPJVS : 1223.3865 ms | ✅ | 🥇x2 🥈x3 🥉x2 🚩x1 🏳️x1 | ||
| 2. LAPX LAPJV : 2312.8874 ms | ✅ | 🥇x4 🥈x2 🥉x1 🏳️x1 🥴x1 | ||
| 3. LAPX LAPJVX : 2315.4386 ms | ✅ | 🥈x3 🥉x4 🚩x2 | ||
| 4. BASELINE SciPy : 2747.6856 ms | ⭐ | 🥇x2 🚩x2 🏳️x3 🥴x2 | ||
| 5. LAPX LAPJVC : 4753.5226 ms | ✅ | 🥈x1 🥉x1 🚩x4 🏳️x3 | ||
| 6. LAPX LAPJV-IFT : 8379.3885 ms | ✅ | 🥇x1 🥉x1 🏳️x1 🥴x6 | ||
| 🎉 ------------------------------------------------------------------------- 🎉 | ||
| 🎉 --------------------------- OVERALL RANKING --------------------------- 🎉 | ||
| 1. LAPX LAPJVS : 505.4603 ms | ✅ | 🥇x3 🥈x1 🥉x4 🏳️x1 | ||
| 2. LAPX LAPJV : 553.5970 ms | ✅ | 🥈x4 🥉x3 🏳️x2 | ||
| 3. LAPX LAPJVX : 556.8710 ms | ✅ | 🥇x4 🥈x2 🥉x1 🚩x2 | ||
| 4. BASELINE SciPy : 1274.1026 ms | ⭐ | 🥇x2 🥈x1 🚩x4 🏳️x1 🥴x1 | ||
| 5. LAPX LAPJV-IFT : 2877.7913 ms | ✅ | 🚩x1 🏳️x3 🥴x5 | ||
| 6. LAPX LAPJVC : 2886.1434 ms | ✅ | 🥈x1 🥉x1 🚩x2 🏳️x2 🥴x3 | ||
| 🎉 ------------------------------------------------------------------------- 🎉 | ||
@@ -768,13 +793,13 @@ | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| 10x10 | 0.000210s 6th | 0.000136s ✓ 4th | 0.000133s ✓ 3rd | 0.000127s ✓ 1st | 0.000142s ✓ 5th | 0.000131s ✓ 2nd | ||
| 25x20 | 0.000150s 3rd | 0.000203s ✓ 6th | 0.000145s ✓ 1st | 0.000148s ✓ 2nd | 0.000152s ✓ 4th | 0.000178s ✓ 5th | ||
| 50x50 | 0.000243s 6th | 0.000239s ✓ 5th | 0.000194s ✓ 1st | 0.000201s ✓ 2nd | 0.000233s ✓ 4th | 0.000203s ✓ 3rd | ||
| 100x150 | 0.000426s 4th | 0.001229s ✓ 6th | 0.000335s ✓ 3rd | 0.000329s ✓ 2nd | 0.001090s ✓ 5th | 0.000311s ✓ 1st | ||
| 250x250 | 0.002309s 6th | 0.002270s ✓ 5th | 0.001100s ✓ 1st | 0.001198s ✓ 3rd | 0.001776s ✓ 4th | 0.001157s ✓ 2nd | ||
| 550x500 | 0.007958s 1st | 0.236500s ✓ 6th | 0.012768s ✓ 3rd | 0.012651s ✓ 2nd | 0.039369s ✓ 5th | 0.012789s ✓ 4th | ||
| 1000x1000 | 0.047147s 5th | 0.089055s ✓ 6th | 0.044446s ✓ 4th | 0.044309s ✓ 3rd | 0.043942s ✓ 2nd | 0.043357s ✓ 1st | ||
| 2000x2500 | 0.078020s 4th | 3.430561s ✓ 6th | 0.021284s ✓ 1st | 0.021622s ✓ 2nd | 2.844436s ✓ 5th | 0.024936s ✓ 3rd | ||
| 5000x5000 | 2.490763s 5th | 4.480608s ✓ 6th | 2.195783s ✓ 3rd | 2.198696s ✓ 4th | 2.009281s ✓ 2nd | 1.052344s ✓ 1st | ||
| 10x10 | 0.000055s 6th | 0.000048s ✓ 5th | 0.000046s ✓ 4th | 0.000037s ✓ 1st | 0.000043s ✓ 3rd | 0.000040s ✓ 2nd | ||
| 25x20 | 0.000043s 1st | 0.000059s ✓ 6th | 0.000053s ✓ 4th | 0.000045s ✓ 2nd | 0.000055s ✓ 5th | 0.000046s ✓ 3rd | ||
| 50x50 | 0.000074s 4th | 0.000080s ✓ 5th | 0.000063s ✓ 3rd | 0.000054s ✓ 1st | 0.000088s ✓ 6th | 0.000058s ✓ 2nd | ||
| 100x150 | 0.000146s 4th | 0.000647s ✓ 5th | 0.000107s ✓ 3rd | 0.000095s ✓ 1st | 0.000714s ✓ 6th | 0.000103s ✓ 2nd | ||
| 250x250 | 0.000964s 4th | 0.001495s ✓ 6th | 0.000565s ✓ 1st | 0.000603s ✓ 2nd | 0.001220s ✓ 5th | 0.000636s ✓ 3rd | ||
| 550x500 | 0.003138s 4th | 0.010879s ✓ 5th | 0.001294s ✓ 1st | 0.001329s ✓ 2nd | 0.016092s ✓ 6th | 0.001405s ✓ 3rd | ||
| 1000x1000 | 0.020857s 3rd | 0.042133s ✓ 6th | 0.019502s ✓ 2nd | 0.019448s ✓ 1st | 0.023370s ✓ 5th | 0.021119s ✓ 4th | ||
| 2000x2500 | 0.032293s 4th | 1.575432s ✓ 6th | 0.014037s ✓ 1st | 0.014037s ✓ 2nd | 1.482075s ✓ 5th | 0.022823s ✓ 3rd | ||
| 5000x5000 | 0.974974s 4th | 1.340142s ✓ 6th | 0.564158s ✓ 2nd | 0.570803s ✓ 3rd | 1.116583s ✓ 5th | 0.442339s ✓ 1st | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
@@ -784,14 +809,14 @@ | ||
| 🎉 --------------------------- OVERALL RANKING --------------------------- 🎉 | ||
| 1. LAPX LAPJVS : 1135.4053 ms | ✅ | 🥇x3 🥈x2 🥉x2 🚩x1 🏳️x1 | ||
| 2. LAPX LAPJV : 2276.1874 ms | ✅ | 🥇x4 🥉x4 🚩x1 | ||
| 3. LAPX LAPJVX : 2279.2815 ms | ✅ | 🥇x1 🥈x5 🥉x2 🚩x1 | ||
| 4. BASELINE SciPy : 2627.2261 ms | ⭐ | 🥇x1 🥉x1 🚩x2 🏳️x2 🥴x3 | ||
| 5. LAPX LAPJVC : 4940.4219 ms | ✅ | 🥈x2 🚩x3 🏳️x4 | ||
| 6. LAPX LAPJV-IFT : 8240.8002 ms | ✅ | 🚩x1 🏳️x2 🥴x6 | ||
| 🎉 ------------------------------------------------------------------------- 🎉 | ||
| 🎉 --------------------------- OVERALL RANKING --------------------------- 🎉 | ||
| 1. LAPX LAPJVS : 488.5671 ms | ✅ | 🥇x1 🥈x3 🥉x4 🚩x1 | ||
| 2. LAPX LAPJV : 599.8239 ms | ✅ | 🥇x3 🥈x2 🥉x2 🚩x2 | ||
| 3. LAPX LAPJVX : 606.4511 ms | ✅ | 🥇x4 🥈x4 🥉x1 | ||
| 4. BASELINE SciPy : 1032.5424 ms | ⭐ | 🥇x1 🥉x1 🚩x6 🥴x1 | ||
| 5. LAPX LAPJVC : 2640.2397 ms | ✅ | 🥉x1 🏳️x5 🥴x3 | ||
| 6. LAPX LAPJV-IFT : 2970.9133 ms | ✅ | 🏳️x4 🥴x5 | ||
| 🎉 ------------------------------------------------------------------------- 🎉 | ||
| ``` | ||
| 👁️ See more results on various platforms and architectures [here](https://github.com/rathaROG/lapx/actions/runs/18830580672). | ||
| 👁️ See more results on various platforms and architectures [here](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml). | ||
| </details> |
+57
-18
@@ -59,25 +59,64 @@ # Copyright (c) 2025 Ratha SIV | MIT License | ||
| __version__ = '0.8.1' | ||
| from typing import TYPE_CHECKING | ||
| import importlib | ||
| # Single-matrix solvers | ||
| from .lapmod import lapmod | ||
| from ._lapjv import ( | ||
| lapjv, | ||
| LARGE_ as LARGE, | ||
| FP_1_ as FP_1, | ||
| FP_2_ as FP_2, | ||
| FP_DYNAMIC_ as FP_DYNAMIC | ||
| ) | ||
| from ._lapjvx import lapjvx, lapjvxa | ||
| from ._lapjvc import lapjvc | ||
| from .lapjvs import lapjvs, lapjvsa | ||
| if TYPE_CHECKING: | ||
| # Single-matrix solvers | ||
| from ._lapmod_wp import lapmod | ||
| from ._lapjv_wp import lapjv | ||
| from ._lapjvx_wp import lapjvx, lapjvxa | ||
| from ._lapjvc_wp import lapjvc | ||
| from ._lapjvs_wp import lapjvs, lapjvsa | ||
| # Batch solvers | ||
| from ._lapjvx_batch_wp import lapjvx_batch, lapjvxa_batch | ||
| from ._lapjvs_batch_wp import lapjvs_batch, lapjvsa_batch | ||
| # Constants | ||
| from ._lapjv import ( # type: ignore | ||
| LARGE_ as LARGE, | ||
| FP_1_ as FP_1, | ||
| FP_2_ as FP_2, | ||
| FP_DYNAMIC_ as FP_DYNAMIC, | ||
| ) | ||
| # Batch solvers | ||
| from .lapjvx_batch import lapjvx_batch, lapjvxa_batch | ||
| from .lapjvs_batch import lapjvs_batch, lapjvsa_batch | ||
| _exports = { | ||
| # Single-matrix solvers | ||
| 'lapmod': ("lap._lapmod_wp", "lapmod"), | ||
| 'lapjv': ("lap._lapjv_wp", "lapjv"), | ||
| 'lapjvx': ("lap._lapjvx_wp", "lapjvx"), | ||
| 'lapjvxa': ("lap._lapjvx_wp", "lapjvxa"), | ||
| 'lapjvc': ("lap._lapjvc_wp", "lapjvc"), | ||
| 'lapjvs': ("lap._lapjvs_wp", "lapjvs"), | ||
| 'lapjvsa': ("lap._lapjvs_wp", "lapjvsa"), | ||
| # Batch solvers | ||
| 'lapjvx_batch': ("lap._lapjvx_batch_wp", "lapjvx_batch"), | ||
| 'lapjvxa_batch': ("lap._lapjvx_batch_wp", "lapjvxa_batch"), | ||
| 'lapjvs_batch': ("lap._lapjvs_batch_wp", "lapjvs_batch"), | ||
| 'lapjvsa_batch': ("lap._lapjvs_batch_wp", "lapjvsa_batch"), | ||
| # Constants | ||
| 'LARGE': ("lap._lapjv", "LARGE_"), | ||
| 'FP_1': ("lap._lapjv", "FP_1_"), | ||
| 'FP_2': ("lap._lapjv", "FP_2_"), | ||
| 'FP_DYNAMIC': ("lap._lapjv", "FP_DYNAMIC_"), | ||
| } | ||
| def __getattr__(name): | ||
| if name in _exports: | ||
| mod_path, attr = _exports[name] | ||
| mod = importlib.import_module(mod_path) | ||
| obj = getattr(mod, attr) | ||
| globals()[name] = obj | ||
| return obj | ||
| raise AttributeError(f"LAPX could not find attribute '{name}'.") | ||
| __version__ = '0.9.0' | ||
| __author__ = 'Ratha SIV' | ||
| __description__ = 'Linear assignment problem solvers, including single and batch solvers.' | ||
| __homepage__ = 'https://github.com/rathaROG/lapx' | ||
| __all__ = [ | ||
| 'lapjv', 'lapjvx', 'lapjvxa', 'lapjvc', 'lapjvs', 'lapjvsa', | ||
| # Single-matrix solvers | ||
| 'lapmod', 'lapjv', 'lapjvx', 'lapjvxa', 'lapjvc', 'lapjvs', 'lapjvsa', | ||
| # Batch solvers | ||
| 'lapjvx_batch', 'lapjvxa_batch', 'lapjvs_batch', 'lapjvsa_batch', | ||
| 'lapmod', 'FP_1', 'FP_2', 'FP_DYNAMIC', 'LARGE' | ||
| # Constants | ||
| 'FP_1', 'FP_2', 'FP_DYNAMIC', 'LARGE', | ||
| ] |
+41
-48
| Metadata-Version: 2.4 | ||
| Name: lapx | ||
| Version: 0.8.1 | ||
| Version: 0.9.0 | ||
| Summary: Linear assignment problem solvers, including single and batch solvers. | ||
@@ -52,9 +52,10 @@ Home-page: https://github.com/rathaROG/lapx | ||
| <details><summary>🆕 What's new</summary><br> | ||
| <details><summary>🆕 What's new</summary><hr> | ||
| - 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) added **`lapjvx_batch()`**, **`lapjvxa_batch()`**, **`lapjvs_batch()`**, **`lapjvsa_batch()`** and **`lapjvsa()`**. | ||
| - 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) added **`lapjvs()`**. | ||
| - 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) added **`lapjvx()`**, **`lapjvxa()`**, and **`lapjvc()`**. | ||
| - 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support. | ||
| - Looking for more? See [GitHub releases](https://github.com/rathaROG/lapx/releases). | ||
| <sup>- 2025/10/31: [v0.9.0](https://github.com/rathaROG/lapx/releases/tag/v0.9.0) delivered a major stability and performance upgrade accross most solvers. 🚀 </sup><br> | ||
| <sup>- 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) added **`lapjvx_batch()`**, **`lapjvxa_batch()`**, **`lapjvs_batch()`**, **`lapjvsa_batch()`** and **`lapjvsa()`**. </sup><br> | ||
| <sup>- 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) added **`lapjvs()`**. </sup><br> | ||
| <sup>- 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) added **`lapjvx()`**, **`lapjvxa()`**, and **`lapjvc()`**. </sup><br> | ||
| <sup>- 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support. </sup><br> | ||
| <sup>- Looking for more? See [GitHub releases](https://github.com/rathaROG/lapx/releases). </sup><br> | ||
@@ -65,17 +66,23 @@ </details> | ||
| <div align="center"> | ||
| [](https://github.com/rathaROG/lapx/releases) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/test_simple.yaml) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/prepublish.yaml) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/publish.yaml) | ||
| [](https://pypi.org/project/lapx/#files) | ||
| [](https://pypi.org/project/lapx/) | ||
| # Linear Assignment Problem Solvers | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml) | ||
| [`lapx`](https://github.com/rathaROG/lapx) supports all input cost types — Single ✓ Batch ✓ Square ✓ Rectangular ✓ . | ||
| # Linear Assignment Problem Solvers · 𝕏 | ||
| `lapx` was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a ***Jonker-Volgenant*** solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions. | ||
| **Single ✓ Batch ✓ Square ✓ Rectangular ✓** | ||
| </div> | ||
| [`lapx`](https://github.com/rathaROG/lapx) was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a ***Jonker-Volgenant*** solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions. | ||
| <details><summary>Click to read more ...</summary><br> | ||
| All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on ***Jonker-Volgenant*** algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core **`lapjv()`** and **`lapmod()`** from scratch based solely on the papers ¹˒² and the public domain Pascal implementation provided by A. Volgenant ³. | ||
| All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on ***Jonker-Volgenant*** algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core **`lapjv()`** and **`lapmod()`** from scratch based solely on the papers ¹˒² and the public domain Pascal implementation ³ provided by A. Volgenant. | ||
@@ -93,3 +100,3 @@ <sup>¹ R. Jonker and A. Volgenant, "A Shortest Augmenting Path Algorithm for Dense and Sparse Linear Assignment Problems", Computing 38, 325-340 (1987) </sup><br> | ||
| [](https://pypi.org/project/lapx/) | ||
| [](https://badge.fury.io/py/lapx) | ||
| [](https://badge.fury.io/py/lapx) | ||
| [](https://pepy.tech/project/lapx) | ||
@@ -102,15 +109,4 @@ [](https://pepy.tech/project/lapx) | ||
| <details><summary>🛞 Pre-built wheel support</summary> | ||
| *The pre-built wheels cover most platforms and architectures, see [details](https://pypi.org/project/lapx/#files).* | ||
| | Python | **Windows** ✅ | **Linux** ✅ | **macOS** ✅ | | ||
| |:---:|:---:|:---:|:---:| | ||
| | 3.7 | AMD64 | x86_64/aarch64 | x86_64 | | ||
| | 3.8 | AMD64 | x86_64/aarch64 | x86_64/arm64 | | ||
| | 3.9-3.14 ¹ | AMD64/ARM64 ² | x86_64/aarch64 | x86_64/arm64 | | ||
| <sup>¹ ⚠️ Pre-built wheels for Python 3.13+ do not support free-threading. </sup><br> | ||
| <sup>² ⚠️ Windows ARM64 is experimental. </sup><br> | ||
| </details> | ||
| <details><summary>🛠️ Other installation options</summary> | ||
@@ -139,2 +135,4 @@ | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/tests.yaml) | ||
| ### 🅰️ Single-matrix Solvers 📄 | ||
@@ -196,3 +194,4 @@ | ||
| total_cost, row_indices, col_indices = lap.lapjvx(np.random.rand(100, 150), extend_cost=True, return_cost=True) | ||
| assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices))) | ||
| assignments = np.column_stack((row_indices, col_indices)) | ||
| # assignments = np.array(list(zip(row_indices, col_indices))) # slower | ||
| ``` | ||
@@ -226,3 +225,4 @@ | ||
| total_cost, row_indices, col_indices = lap.lapjvc(np.random.rand(100, 150), return_cost=True) | ||
| assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices))) | ||
| assignments = np.column_stack((row_indices, col_indices)) | ||
| # assignments = np.array(list(zip(row_indices, col_indices))) # slower | ||
| ``` | ||
@@ -243,3 +243,4 @@ | ||
| total_cost, row_indices, col_indices = lap.lapjvs(np.random.rand(100, 150), return_cost=True, jvx_like=True) | ||
| assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices))) | ||
| assignments = np.column_stack((row_indices, col_indices)) | ||
| # assignments = np.array(list(zip(row_indices, col_indices))) # slower | ||
| ``` | ||
@@ -268,3 +269,3 @@ | ||
| For see the [docstring](https://github.com/rathaROG/lapx/blob/8d56b42265a23c3b5a290b1039dacaac70dfe60d/lap/lapmod.py#L275) for details. | ||
| For see the [`lap/_lapmod_wp.py`](https://github.com/rathaROG/lapx/blob/main/lap/_lapmod_wp.py) for details. | ||
@@ -302,7 +303,4 @@ ```python | ||
| print(f"total costs = {costs.sum()}") | ||
| # Access the assignment batch b = 7 | ||
| rows_7 = rows[7] # 1D array of row indices | ||
| cols_7 = cols[7] # 1D array of col indices | ||
| assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2) | ||
| # access the assignments @ batch b = 7 | ||
| assignments_7 = np.column_stack((rows[7], cols[7])) # (K_b, 2) | ||
| print(f"assignments_7.shape = {assignments_7.shape}") | ||
@@ -323,4 +321,3 @@ ``` | ||
| print(f"total costs = {costs.sum()}") | ||
| print(f"len(assignments) = {len(assignments)}") | ||
| print(f"assignments[0].shape = {assignments[0].shape}") | ||
| print(f"assignments[7].shape = {assignments[7].shape}") # assignments @ batch b = 7 | ||
| ``` | ||
@@ -342,7 +339,4 @@ | ||
| print(f"total costs = {costs.sum()}") | ||
| # Access the assignment batch b = 7 | ||
| rows_7 = rows[7] # 1D array of row indices | ||
| cols_7 = cols[7] # 1D array of col indices | ||
| assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2) | ||
| # access the assignments @ batch b = 7 | ||
| assignments_7 = np.column_stack((rows[7], cols[7])) # (K_b, 2) | ||
| print(f"assignments_7.shape = {assignments_7.shape}") | ||
@@ -365,4 +359,3 @@ ``` | ||
| print(f"total costs = {costs.sum()}") | ||
| print(f"len(assignments) = {len(assignments)}") | ||
| print(f"assignments[0].shape = {assignments[0].shape}") | ||
| print(f"assignments[7].shape = {assignments[7].shape}") # assignments @ batch b = 7 | ||
| ``` | ||
@@ -378,3 +371,3 @@ | ||
| [](https://github.com/rathaROG/lapx/blob/main/NOTICE) | ||
| [](https://github.com/rathaROG/lapx/blob/main/LICENSE) | ||
| [](https://github.com/rathaROG/lapx/blob/main/NOTICE) | ||
| [](https://github.com/rathaROG/lapx/blob/main/LICENSE) |
@@ -9,6 +9,9 @@ LICENSE | ||
| lap/__init__.py | ||
| lap/lapjvs.py | ||
| lap/lapjvs_batch.py | ||
| lap/lapjvx_batch.py | ||
| lap/lapmod.py | ||
| lap/_lapjv_wp.py | ||
| lap/_lapjvc_wp.py | ||
| lap/_lapjvs_batch_wp.py | ||
| lap/_lapjvs_wp.py | ||
| lap/_lapjvx_batch_wp.py | ||
| lap/_lapjvx_wp.py | ||
| lap/_lapmod_wp.py | ||
| lapx.egg-info/PKG-INFO | ||
@@ -15,0 +18,0 @@ lapx.egg-info/SOURCES.txt |
+1
-0
@@ -8,2 +8,3 @@ include *.md *.in LICENSE NOTICE | ||
| prune .vscode | ||
| prune benchmarks | ||
| prune tests |
+41
-48
| Metadata-Version: 2.4 | ||
| Name: lapx | ||
| Version: 0.8.1 | ||
| Version: 0.9.0 | ||
| Summary: Linear assignment problem solvers, including single and batch solvers. | ||
@@ -52,9 +52,10 @@ Home-page: https://github.com/rathaROG/lapx | ||
| <details><summary>🆕 What's new</summary><br> | ||
| <details><summary>🆕 What's new</summary><hr> | ||
| - 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) added **`lapjvx_batch()`**, **`lapjvxa_batch()`**, **`lapjvs_batch()`**, **`lapjvsa_batch()`** and **`lapjvsa()`**. | ||
| - 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) added **`lapjvs()`**. | ||
| - 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) added **`lapjvx()`**, **`lapjvxa()`**, and **`lapjvc()`**. | ||
| - 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support. | ||
| - Looking for more? See [GitHub releases](https://github.com/rathaROG/lapx/releases). | ||
| <sup>- 2025/10/31: [v0.9.0](https://github.com/rathaROG/lapx/releases/tag/v0.9.0) delivered a major stability and performance upgrade accross most solvers. 🚀 </sup><br> | ||
| <sup>- 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) added **`lapjvx_batch()`**, **`lapjvxa_batch()`**, **`lapjvs_batch()`**, **`lapjvsa_batch()`** and **`lapjvsa()`**. </sup><br> | ||
| <sup>- 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) added **`lapjvs()`**. </sup><br> | ||
| <sup>- 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) added **`lapjvx()`**, **`lapjvxa()`**, and **`lapjvc()`**. </sup><br> | ||
| <sup>- 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support. </sup><br> | ||
| <sup>- Looking for more? See [GitHub releases](https://github.com/rathaROG/lapx/releases). </sup><br> | ||
@@ -65,17 +66,23 @@ </details> | ||
| <div align="center"> | ||
| [](https://github.com/rathaROG/lapx/releases) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/test_simple.yaml) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/prepublish.yaml) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/publish.yaml) | ||
| [](https://pypi.org/project/lapx/#files) | ||
| [](https://pypi.org/project/lapx/) | ||
| # Linear Assignment Problem Solvers | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml) | ||
| [`lapx`](https://github.com/rathaROG/lapx) supports all input cost types — Single ✓ Batch ✓ Square ✓ Rectangular ✓ . | ||
| # Linear Assignment Problem Solvers · 𝕏 | ||
| `lapx` was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a ***Jonker-Volgenant*** solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions. | ||
| **Single ✓ Batch ✓ Square ✓ Rectangular ✓** | ||
| </div> | ||
| [`lapx`](https://github.com/rathaROG/lapx) was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a ***Jonker-Volgenant*** solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions. | ||
| <details><summary>Click to read more ...</summary><br> | ||
| All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on ***Jonker-Volgenant*** algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core **`lapjv()`** and **`lapmod()`** from scratch based solely on the papers ¹˒² and the public domain Pascal implementation provided by A. Volgenant ³. | ||
| All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on ***Jonker-Volgenant*** algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core **`lapjv()`** and **`lapmod()`** from scratch based solely on the papers ¹˒² and the public domain Pascal implementation ³ provided by A. Volgenant. | ||
@@ -93,3 +100,3 @@ <sup>¹ R. Jonker and A. Volgenant, "A Shortest Augmenting Path Algorithm for Dense and Sparse Linear Assignment Problems", Computing 38, 325-340 (1987) </sup><br> | ||
| [](https://pypi.org/project/lapx/) | ||
| [](https://badge.fury.io/py/lapx) | ||
| [](https://badge.fury.io/py/lapx) | ||
| [](https://pepy.tech/project/lapx) | ||
@@ -102,15 +109,4 @@ [](https://pepy.tech/project/lapx) | ||
| <details><summary>🛞 Pre-built wheel support</summary> | ||
| *The pre-built wheels cover most platforms and architectures, see [details](https://pypi.org/project/lapx/#files).* | ||
| | Python | **Windows** ✅ | **Linux** ✅ | **macOS** ✅ | | ||
| |:---:|:---:|:---:|:---:| | ||
| | 3.7 | AMD64 | x86_64/aarch64 | x86_64 | | ||
| | 3.8 | AMD64 | x86_64/aarch64 | x86_64/arm64 | | ||
| | 3.9-3.14 ¹ | AMD64/ARM64 ² | x86_64/aarch64 | x86_64/arm64 | | ||
| <sup>¹ ⚠️ Pre-built wheels for Python 3.13+ do not support free-threading. </sup><br> | ||
| <sup>² ⚠️ Windows ARM64 is experimental. </sup><br> | ||
| </details> | ||
| <details><summary>🛠️ Other installation options</summary> | ||
@@ -139,2 +135,4 @@ | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/tests.yaml) | ||
| ### 🅰️ Single-matrix Solvers 📄 | ||
@@ -196,3 +194,4 @@ | ||
| total_cost, row_indices, col_indices = lap.lapjvx(np.random.rand(100, 150), extend_cost=True, return_cost=True) | ||
| assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices))) | ||
| assignments = np.column_stack((row_indices, col_indices)) | ||
| # assignments = np.array(list(zip(row_indices, col_indices))) # slower | ||
| ``` | ||
@@ -226,3 +225,4 @@ | ||
| total_cost, row_indices, col_indices = lap.lapjvc(np.random.rand(100, 150), return_cost=True) | ||
| assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices))) | ||
| assignments = np.column_stack((row_indices, col_indices)) | ||
| # assignments = np.array(list(zip(row_indices, col_indices))) # slower | ||
| ``` | ||
@@ -243,3 +243,4 @@ | ||
| total_cost, row_indices, col_indices = lap.lapjvs(np.random.rand(100, 150), return_cost=True, jvx_like=True) | ||
| assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices))) | ||
| assignments = np.column_stack((row_indices, col_indices)) | ||
| # assignments = np.array(list(zip(row_indices, col_indices))) # slower | ||
| ``` | ||
@@ -268,3 +269,3 @@ | ||
| For see the [docstring](https://github.com/rathaROG/lapx/blob/8d56b42265a23c3b5a290b1039dacaac70dfe60d/lap/lapmod.py#L275) for details. | ||
| For see the [`lap/_lapmod_wp.py`](https://github.com/rathaROG/lapx/blob/main/lap/_lapmod_wp.py) for details. | ||
@@ -302,7 +303,4 @@ ```python | ||
| print(f"total costs = {costs.sum()}") | ||
| # Access the assignment batch b = 7 | ||
| rows_7 = rows[7] # 1D array of row indices | ||
| cols_7 = cols[7] # 1D array of col indices | ||
| assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2) | ||
| # access the assignments @ batch b = 7 | ||
| assignments_7 = np.column_stack((rows[7], cols[7])) # (K_b, 2) | ||
| print(f"assignments_7.shape = {assignments_7.shape}") | ||
@@ -323,4 +321,3 @@ ``` | ||
| print(f"total costs = {costs.sum()}") | ||
| print(f"len(assignments) = {len(assignments)}") | ||
| print(f"assignments[0].shape = {assignments[0].shape}") | ||
| print(f"assignments[7].shape = {assignments[7].shape}") # assignments @ batch b = 7 | ||
| ``` | ||
@@ -342,7 +339,4 @@ | ||
| print(f"total costs = {costs.sum()}") | ||
| # Access the assignment batch b = 7 | ||
| rows_7 = rows[7] # 1D array of row indices | ||
| cols_7 = cols[7] # 1D array of col indices | ||
| assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2) | ||
| # access the assignments @ batch b = 7 | ||
| assignments_7 = np.column_stack((rows[7], cols[7])) # (K_b, 2) | ||
| print(f"assignments_7.shape = {assignments_7.shape}") | ||
@@ -365,4 +359,3 @@ ``` | ||
| print(f"total costs = {costs.sum()}") | ||
| print(f"len(assignments) = {len(assignments)}") | ||
| print(f"assignments[0].shape = {assignments[0].shape}") | ||
| print(f"assignments[7].shape = {assignments[7].shape}") # assignments @ batch b = 7 | ||
| ``` | ||
@@ -378,3 +371,3 @@ | ||
| [](https://github.com/rathaROG/lapx/blob/main/NOTICE) | ||
| [](https://github.com/rathaROG/lapx/blob/main/LICENSE) | ||
| [](https://github.com/rathaROG/lapx/blob/main/NOTICE) | ||
| [](https://github.com/rathaROG/lapx/blob/main/LICENSE) |
+40
-47
@@ -1,8 +0,9 @@ | ||
| <details><summary>🆕 What's new</summary><br> | ||
| <details><summary>🆕 What's new</summary><hr> | ||
| - 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) added **`lapjvx_batch()`**, **`lapjvxa_batch()`**, **`lapjvs_batch()`**, **`lapjvsa_batch()`** and **`lapjvsa()`**. | ||
| - 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) added **`lapjvs()`**. | ||
| - 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) added **`lapjvx()`**, **`lapjvxa()`**, and **`lapjvc()`**. | ||
| - 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support. | ||
| - Looking for more? See [GitHub releases](https://github.com/rathaROG/lapx/releases). | ||
| <sup>- 2025/10/31: [v0.9.0](https://github.com/rathaROG/lapx/releases/tag/v0.9.0) delivered a major stability and performance upgrade accross most solvers. 🚀 </sup><br> | ||
| <sup>- 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) added **`lapjvx_batch()`**, **`lapjvxa_batch()`**, **`lapjvs_batch()`**, **`lapjvsa_batch()`** and **`lapjvsa()`**. </sup><br> | ||
| <sup>- 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) added **`lapjvs()`**. </sup><br> | ||
| <sup>- 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) added **`lapjvx()`**, **`lapjvxa()`**, and **`lapjvc()`**. </sup><br> | ||
| <sup>- 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support. </sup><br> | ||
| <sup>- Looking for more? See [GitHub releases](https://github.com/rathaROG/lapx/releases). </sup><br> | ||
@@ -13,17 +14,23 @@ </details> | ||
| <div align="center"> | ||
| [](https://github.com/rathaROG/lapx/releases) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/test_simple.yaml) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/prepublish.yaml) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/publish.yaml) | ||
| [](https://pypi.org/project/lapx/#files) | ||
| [](https://pypi.org/project/lapx/) | ||
| # Linear Assignment Problem Solvers | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml) | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml) | ||
| [`lapx`](https://github.com/rathaROG/lapx) supports all input cost types — Single ✓ Batch ✓ Square ✓ Rectangular ✓ . | ||
| # Linear Assignment Problem Solvers · 𝕏 | ||
| `lapx` was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a ***Jonker-Volgenant*** solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions. | ||
| **Single ✓ Batch ✓ Square ✓ Rectangular ✓** | ||
| </div> | ||
| [`lapx`](https://github.com/rathaROG/lapx) was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a ***Jonker-Volgenant*** solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions. | ||
| <details><summary>Click to read more ...</summary><br> | ||
| All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on ***Jonker-Volgenant*** algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core **`lapjv()`** and **`lapmod()`** from scratch based solely on the papers ¹˒² and the public domain Pascal implementation provided by A. Volgenant ³. | ||
| All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on ***Jonker-Volgenant*** algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core **`lapjv()`** and **`lapmod()`** from scratch based solely on the papers ¹˒² and the public domain Pascal implementation ³ provided by A. Volgenant. | ||
@@ -41,3 +48,3 @@ <sup>¹ R. Jonker and A. Volgenant, "A Shortest Augmenting Path Algorithm for Dense and Sparse Linear Assignment Problems", Computing 38, 325-340 (1987) </sup><br> | ||
| [](https://pypi.org/project/lapx/) | ||
| [](https://badge.fury.io/py/lapx) | ||
| [](https://badge.fury.io/py/lapx) | ||
| [](https://pepy.tech/project/lapx) | ||
@@ -50,15 +57,4 @@ [](https://pepy.tech/project/lapx) | ||
| <details><summary>🛞 Pre-built wheel support</summary> | ||
| *The pre-built wheels cover most platforms and architectures, see [details](https://pypi.org/project/lapx/#files).* | ||
| | Python | **Windows** ✅ | **Linux** ✅ | **macOS** ✅ | | ||
| |:---:|:---:|:---:|:---:| | ||
| | 3.7 | AMD64 | x86_64/aarch64 | x86_64 | | ||
| | 3.8 | AMD64 | x86_64/aarch64 | x86_64/arm64 | | ||
| | 3.9-3.14 ¹ | AMD64/ARM64 ² | x86_64/aarch64 | x86_64/arm64 | | ||
| <sup>¹ ⚠️ Pre-built wheels for Python 3.13+ do not support free-threading. </sup><br> | ||
| <sup>² ⚠️ Windows ARM64 is experimental. </sup><br> | ||
| </details> | ||
| <details><summary>🛠️ Other installation options</summary> | ||
@@ -87,2 +83,4 @@ | ||
| [](https://github.com/rathaROG/lapx/actions/workflows/tests.yaml) | ||
| ### 🅰️ Single-matrix Solvers 📄 | ||
@@ -144,3 +142,4 @@ | ||
| total_cost, row_indices, col_indices = lap.lapjvx(np.random.rand(100, 150), extend_cost=True, return_cost=True) | ||
| assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices))) | ||
| assignments = np.column_stack((row_indices, col_indices)) | ||
| # assignments = np.array(list(zip(row_indices, col_indices))) # slower | ||
| ``` | ||
@@ -174,3 +173,4 @@ | ||
| total_cost, row_indices, col_indices = lap.lapjvc(np.random.rand(100, 150), return_cost=True) | ||
| assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices))) | ||
| assignments = np.column_stack((row_indices, col_indices)) | ||
| # assignments = np.array(list(zip(row_indices, col_indices))) # slower | ||
| ``` | ||
@@ -191,3 +191,4 @@ | ||
| total_cost, row_indices, col_indices = lap.lapjvs(np.random.rand(100, 150), return_cost=True, jvx_like=True) | ||
| assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices))) | ||
| assignments = np.column_stack((row_indices, col_indices)) | ||
| # assignments = np.array(list(zip(row_indices, col_indices))) # slower | ||
| ``` | ||
@@ -216,3 +217,3 @@ | ||
| For see the [docstring](https://github.com/rathaROG/lapx/blob/8d56b42265a23c3b5a290b1039dacaac70dfe60d/lap/lapmod.py#L275) for details. | ||
| For see the [`lap/_lapmod_wp.py`](https://github.com/rathaROG/lapx/blob/main/lap/_lapmod_wp.py) for details. | ||
@@ -250,7 +251,4 @@ ```python | ||
| print(f"total costs = {costs.sum()}") | ||
| # Access the assignment batch b = 7 | ||
| rows_7 = rows[7] # 1D array of row indices | ||
| cols_7 = cols[7] # 1D array of col indices | ||
| assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2) | ||
| # access the assignments @ batch b = 7 | ||
| assignments_7 = np.column_stack((rows[7], cols[7])) # (K_b, 2) | ||
| print(f"assignments_7.shape = {assignments_7.shape}") | ||
@@ -271,4 +269,3 @@ ``` | ||
| print(f"total costs = {costs.sum()}") | ||
| print(f"len(assignments) = {len(assignments)}") | ||
| print(f"assignments[0].shape = {assignments[0].shape}") | ||
| print(f"assignments[7].shape = {assignments[7].shape}") # assignments @ batch b = 7 | ||
| ``` | ||
@@ -290,7 +287,4 @@ | ||
| print(f"total costs = {costs.sum()}") | ||
| # Access the assignment batch b = 7 | ||
| rows_7 = rows[7] # 1D array of row indices | ||
| cols_7 = cols[7] # 1D array of col indices | ||
| assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2) | ||
| # access the assignments @ batch b = 7 | ||
| assignments_7 = np.column_stack((rows[7], cols[7])) # (K_b, 2) | ||
| print(f"assignments_7.shape = {assignments_7.shape}") | ||
@@ -313,4 +307,3 @@ ``` | ||
| print(f"total costs = {costs.sum()}") | ||
| print(f"len(assignments) = {len(assignments)}") | ||
| print(f"assignments[0].shape = {assignments[0].shape}") | ||
| print(f"assignments[7].shape = {assignments[7].shape}") # assignments @ batch b = 7 | ||
| ``` | ||
@@ -326,3 +319,3 @@ | ||
| [](https://github.com/rathaROG/lapx/blob/main/NOTICE) | ||
| [](https://github.com/rathaROG/lapx/blob/main/LICENSE) | ||
| [](https://github.com/rathaROG/lapx/blob/main/NOTICE) | ||
| [](https://github.com/rathaROG/lapx/blob/main/LICENSE) |
+154
-61
@@ -46,72 +46,130 @@ # Tomas Kazmar, 2012-2017, BSD 2-clause license, see LICENSE. | ||
| double cost_limit=np.inf, char return_cost=True): | ||
| """Solve linear assignment problem using Jonker-Volgenant algorithm. | ||
| """ | ||
| Solve the Linear Assignment Problem using the Jonker-Volgenant (JV) algorithm. | ||
| Orientation is normalized internally for performance: the native kernel | ||
| always sees rows <= cols by transposing when N_rows > N_cols, and results | ||
| are mapped back to the original (row, col) orientation before returning. | ||
| Parameters | ||
| ---------- | ||
| cost: (N,M) ndarray | ||
| Cost matrix. Entry `cost[i, j]` is the cost of assigning row `i` to | ||
| column `j`. | ||
| extend_cost: bool, optional | ||
| Whether or not extend a non-square matrix. Default: False. | ||
| cost_limit: double, optional | ||
| An upper limit for a cost of a single assignment. Default: `np.inf`. | ||
| return_cost: bool, optional | ||
| Whether or not to return the assignment cost. | ||
| cost : (N, M) ndarray | ||
| Cost matrix. Entry cost[i, j] is the cost of assigning row i to column j. | ||
| Any float dtype is accepted; a contiguous float64 working buffer is used only if needed. | ||
| extend_cost : bool, optional (default: False) | ||
| Whether to permit non-square inputs via zero-padding to a square matrix. | ||
| See the unified augmentation policy below. | ||
| cost_limit : float, optional (default: np.inf) | ||
| If finite, augment to an (N+M) x (N+M) matrix with sentinel costs | ||
| cost_limit/2 and a 0 block in the bottom-right. This models per-edge | ||
| reject costs and allows rectangular inputs even when extend_cost=False. | ||
| return_cost : bool, optional (default: True) | ||
| Whether to return the total assignment cost as the first return value. | ||
| Returns | ||
| ------- | ||
| opt: double | ||
| Assignment cost. Not returned if `return_cost is False`. | ||
| x: (N,) ndarray | ||
| Assignment. `x[i]` specifies the column to which row `i` is assigned. | ||
| y: (N,) ndarray | ||
| Assignment. `y[j]` specifies the row to which column `j` is assigned. | ||
| opt : float | ||
| Total assignment cost (only if return_cost=True). The total is computed | ||
| from the ORIGINAL input array shape (N, M), not the padded/augmented one. | ||
| x : (N,) ndarray of int32 | ||
| x[i] = assigned column index for row i, or -1 if unassigned. | ||
| y : (M,) ndarray of int32 | ||
| y[j] = assigned row index for column j, or -1 if unassigned. | ||
| Unified augmentation policy | ||
| --------------------------- | ||
| - If cost_limit < inf: always augment to (N+M) to model per-edge rejects. | ||
| Rectangular inputs are allowed regardless of extend_cost. | ||
| - Else if (N != M) or extend_cost=True: zero-pad to a square of size max(N, M). | ||
| Rectangular inputs are allowed when extend_cost=True. | ||
| - Else (square, un-augmented): run on the given square matrix. | ||
| Notes | ||
| ----- | ||
| For non-square matrices (with `extend_cost is True`) or `cost_limit` set | ||
| low enough, there will be unmatched rows, columns in the solution `x`, `y`. | ||
| All such entries are set to -1. | ||
| - Single contiguous working buffer: we reuse the input when it's already | ||
| float64 C-contiguous and no transpose is needed; otherwise we materialize | ||
| exactly one contiguous float64 working array (or its transpose). | ||
| - For zero-sized dimensions, the solver returns 0.0 (if requested) and | ||
| all -1 mappings with appropriate lengths. | ||
| """ | ||
| if cost.ndim != 2: | ||
| raise ValueError('2-dimensional array expected') | ||
| cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] cost_c = \ | ||
| np.ascontiguousarray(cost, dtype=np.double) | ||
| # Original input for final total computation (no copy unless necessary for transpose below) | ||
| A = np.asarray(cost) | ||
| cdef Py_ssize_t n_rows0 = A.shape[0] | ||
| cdef Py_ssize_t n_cols0 = A.shape[1] | ||
| # Fast exits for empty dimensions | ||
| if n_rows0 == 0 or n_cols0 == 0: | ||
| if return_cost: | ||
| return 0.0, np.full((n_rows0,), -1, dtype=np.int32), np.full((n_cols0,), -1, dtype=np.int32) | ||
| else: | ||
| return np.full((n_rows0,), -1, dtype=np.int32), np.full((n_cols0,), -1, dtype=np.int32) | ||
| # Normalize orientation: kernel sees rows <= cols | ||
| cdef bint transposed = False | ||
| cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] B | ||
| if n_rows0 > n_cols0: | ||
| B = np.ascontiguousarray(A.T, dtype=np.double) # single working buffer (transposed) | ||
| transposed = True | ||
| else: | ||
| if A.dtype == np.float64 and A.flags['C_CONTIGUOUS']: | ||
| # reuse input buffer directly | ||
| B = A | ||
| else: | ||
| B = np.ascontiguousarray(A, dtype=np.double) # single working buffer | ||
| cdef Py_ssize_t R = B.shape[0] # working rows (<= cols) | ||
| cdef Py_ssize_t C = B.shape[1] # working cols | ||
| # Permit rectangular when cost_limit < inf (augment) or extend_cost=True (zero-pad); otherwise require square | ||
| if R != C and (not extend_cost) and cost_limit == np.inf: | ||
| raise ValueError( | ||
| 'Square cost array expected. If cost is intentionally ' | ||
| 'non-square, pass extend_cost=True or set a finite cost_limit.' | ||
| ) | ||
| cdef uint_t N | ||
| cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] cost_c = B | ||
| cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] cost_c_extended | ||
| cdef uint_t n_rows = cost_c.shape[0] | ||
| cdef uint_t n_cols = cost_c.shape[1] | ||
| cdef uint_t n = 0 | ||
| if n_rows == n_cols: | ||
| n = n_rows | ||
| else: | ||
| if not extend_cost: | ||
| raise ValueError( | ||
| 'Square cost array expected. If cost is intentionally ' | ||
| 'non-square, pass extend_cost=True.') | ||
| if cost_limit < np.inf: | ||
| n = n_rows + n_cols | ||
| cost_c_extended = np.empty((n, n), dtype=np.double) | ||
| cost_c_extended[:] = cost_limit / 2. | ||
| cost_c_extended[n_rows:, n_cols:] = 0 | ||
| cost_c_extended[:n_rows, :n_cols] = cost_c | ||
| # Augment to (R+C)x(R+C) with sentinel edges | ||
| N = <uint_t>(R + C) | ||
| cost_c_extended = np.empty((R + C, R + C), dtype=np.double) | ||
| cost_c_extended[:] = cost_limit / 2.0 | ||
| cost_c_extended[R:, C:] = 0.0 | ||
| cost_c_extended[:R, :C] = cost_c | ||
| cost_c = cost_c_extended | ||
| elif extend_cost: | ||
| n = max(n_rows, n_cols) | ||
| cost_c_extended = np.zeros((n, n), dtype=np.double) | ||
| cost_c_extended[:n_rows, :n_cols] = cost_c | ||
| cost_c = cost_c_extended | ||
| elif R != C or extend_cost: | ||
| # Zero-pad to square max(R, C); if already square and extend_cost=True, keep as-is | ||
| N = <uint_t>max(R, C) | ||
| if R != C: | ||
| cost_c_extended = np.zeros((N, N), dtype=np.double) | ||
| cost_c_extended[:R, :C] = cost_c | ||
| cost_c = cost_c_extended | ||
| else: | ||
| N = <uint_t>R | ||
| else: | ||
| # Square, un-augmented | ||
| N = <uint_t>R | ||
| cdef double **cost_ptr | ||
| cost_ptr = <double **> malloc(n * sizeof(double *)) | ||
| # Build row-pointer view for kernel | ||
| cdef double **cost_ptr = <double **> malloc(N * sizeof(double *)) | ||
| if cost_ptr == NULL: | ||
| raise MemoryError('Out of memory.') | ||
| cdef int i | ||
| for i in range(n): | ||
| for i in range(N): | ||
| cost_ptr[i] = &cost_c[i, 0] | ||
| cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_c = \ | ||
| np.empty((n,), dtype=np.int32) | ||
| cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_c = \ | ||
| np.empty((n,), dtype=np.int32) | ||
| # Outputs for kernel space (size N) | ||
| cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_c = np.empty((N,), dtype=np.int32) | ||
| cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_c = np.empty((N,), dtype=np.int32) | ||
| cdef int ret = lapjv_internal(n, cost_ptr, &x_c[0], &y_c[0]) | ||
| cdef int ret | ||
| with nogil: | ||
| ret = lapjv_internal(N, cost_ptr, &x_c[0], &y_c[0]) | ||
| free(cost_ptr) | ||
| if ret != 0: | ||
@@ -122,17 +180,52 @@ if ret == -1: | ||
| cdef double opt = np.nan | ||
| if cost_limit < np.inf or extend_cost: | ||
| x_c[x_c >= n_cols] = -1 | ||
| y_c[y_c >= n_rows] = -1 | ||
| x_c = x_c[:n_rows] | ||
| y_c = y_c[:n_cols] | ||
| if return_cost: | ||
| opt = cost_c[np.nonzero(x_c != -1)[0], x_c[x_c != -1]].sum() | ||
| elif return_cost: | ||
| opt = cost_c[np.arange(n_rows), x_c].sum() | ||
| # Trim to working rectangle (B space) and clean artificial matches | ||
| cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_trim | ||
| cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_trim | ||
| if cost_limit < np.inf or (R != C or extend_cost): | ||
| x_c[x_c >= C] = -1 | ||
| y_c[y_c >= R] = -1 | ||
| x_trim = x_c[:R] | ||
| y_trim = y_c[:C] | ||
| else: | ||
| x_trim = x_c[:R] | ||
| y_trim = y_c[:C] | ||
| # Map to ORIGINAL orientation (A space) as vectors x_out (N_rows0), y_out (N_cols0) | ||
| x_out = np.full((n_rows0,), -1, dtype=np.int32) | ||
| y_out = np.full((n_cols0,), -1, dtype=np.int32) | ||
| if not transposed: | ||
| mask = (x_trim >= 0) | ||
| if np.any(mask): | ||
| rows = np.nonzero(mask)[0] | ||
| cols = x_trim[mask] | ||
| x_out[rows] = cols | ||
| y_out[cols] = rows | ||
| else: | ||
| # B rows are A columns; B cols are A rows | ||
| mask = (x_trim >= 0) | ||
| if np.any(mask): | ||
| rows_b = np.nonzero(mask)[0] # indices in B rows -> A columns | ||
| cols_b = x_trim[mask] # indices in B cols -> A rows | ||
| row_A = cols_b | ||
| col_A = rows_b | ||
| x_out[row_A] = col_A | ||
| y_out[col_A] = row_A | ||
| # Total from ORIGINAL A | ||
| cdef double opt = 0.0 | ||
| if return_cost: | ||
| return opt, x_c, y_c | ||
| mcost = (x_out >= 0) | ||
| if np.any(mcost): | ||
| rr = np.nonzero(mcost)[0] | ||
| cc = x_out[mcost] | ||
| opt = float(A[rr, cc].sum()) | ||
| else: | ||
| opt = 0.0 | ||
| if return_cost: | ||
| return opt, x_out, y_out | ||
| else: | ||
| return x_c, y_c | ||
| return x_out, y_out | ||
@@ -139,0 +232,0 @@ |
+105
-68
@@ -40,63 +40,92 @@ # _lapjvx.pyx | Wrote on 2025/10/16 by rathaROG | ||
| Parameters | ||
| ---------- | ||
| cost: (N, M) ndarray | ||
| Cost matrix. Entry cost[i, j] is the cost of assigning row i to column j. | ||
| extend_cost: bool, optional | ||
| Whether or not to extend a non-square matrix. Default: False. | ||
| cost_limit: double, optional | ||
| An upper limit for a cost of a single assignment. Default: np.inf. | ||
| return_cost: bool, optional | ||
| Whether or not to return the assignment cost. | ||
| Orientation is normalized internally (kernel sees rows <= cols) and results | ||
| are mapped back to the original orientation. | ||
| Unified augmentation policy | ||
| --------------------------- | ||
| - If cost_limit < inf: augment to (N+M) with cost_limit/2 sentinels (rectangular allowed). | ||
| - Elif (N != M) or extend_cost: zero-pad to square max(N, M) (rectangular allowed when extend_cost=True). | ||
| - Else (square, un-augmented): run on the given square. | ||
| Returns | ||
| ------- | ||
| opt: double | ||
| Assignment cost. Not returned if return_cost is False. | ||
| row_indices: (K,) ndarray | ||
| Indices of assigned rows. | ||
| col_indices: (K,) ndarray | ||
| Indices of assigned columns. | ||
| opt : float | ||
| Total cost (if return_cost=True), computed on the ORIGINAL input (not padded). | ||
| row_indices : (K,) ndarray (np.where -> int64) | ||
| col_indices : (K,) ndarray (sliced from x_c -> int32) | ||
| """ | ||
| if cost.ndim != 2: | ||
| raise ValueError('2-dimensional array expected') | ||
| cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] cost_c = \ | ||
| np.ascontiguousarray(cost, dtype=np.double) | ||
| # Original input for final total (no copy unless needed for transpose) | ||
| A = np.asarray(cost) | ||
| cdef Py_ssize_t n_rows0 = A.shape[0] | ||
| cdef Py_ssize_t n_cols0 = A.shape[1] | ||
| # Fast exits for empty dims | ||
| if n_rows0 == 0 or n_cols0 == 0: | ||
| if return_cost: | ||
| return 0.0, np.empty((0,), dtype=np.int64), np.empty((0,), dtype=np.int64) | ||
| else: | ||
| return np.empty((0,), dtype=np.int64), np.empty((0,), dtype=np.int64) | ||
| # Normalize orientation: kernel sees rows <= cols | ||
| cdef bint transposed = False | ||
| cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] B | ||
| if n_rows0 > n_cols0: | ||
| B = np.ascontiguousarray(A.T, dtype=np.double) # single working buffer (transposed) | ||
| transposed = True | ||
| else: | ||
| if A.dtype == np.float64 and A.flags['C_CONTIGUOUS']: | ||
| B = A # reuse input buffer | ||
| else: | ||
| B = np.ascontiguousarray(A, dtype=np.double) # single working buffer | ||
| cdef Py_ssize_t R = B.shape[0] | ||
| cdef Py_ssize_t C = B.shape[1] | ||
| # Gate: rectangular error only if extend_cost=False and cost_limit==inf | ||
| if R != C and (not extend_cost) and cost_limit == np.inf: | ||
| raise ValueError( | ||
| 'Square cost array expected. If cost is intentionally ' | ||
| 'non-square, pass extend_cost=True.' | ||
| ) | ||
| cdef uint_t N | ||
| cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] cost_c = B | ||
| cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] cost_c_extended | ||
| cdef uint_t n_rows = cost_c.shape[0] | ||
| cdef uint_t n_cols = cost_c.shape[1] | ||
| cdef uint_t n = 0 | ||
| if n_rows == n_cols: | ||
| n = n_rows | ||
| else: | ||
| if not extend_cost: | ||
| raise ValueError( | ||
| 'Square cost array expected. If cost is intentionally ' | ||
| 'non-square, pass extend_cost=True.') | ||
| if cost_limit < np.inf: | ||
| n = n_rows + n_cols | ||
| cost_c_extended = np.empty((n, n), dtype=np.double) | ||
| cost_c_extended[:] = cost_limit / 2. | ||
| cost_c_extended[n_rows:, n_cols:] = 0 | ||
| cost_c_extended[:n_rows, :n_cols] = cost_c | ||
| N = <uint_t>(R + C) | ||
| cost_c_extended = np.empty((R + C, R + C), dtype=np.double) | ||
| cost_c_extended[:] = cost_limit / 2.0 | ||
| cost_c_extended[R:, C:] = 0.0 | ||
| cost_c_extended[:R, :C] = cost_c | ||
| cost_c = cost_c_extended | ||
| elif extend_cost: | ||
| n = max(n_rows, n_cols) | ||
| cost_c_extended = np.zeros((n, n), dtype=np.double) | ||
| cost_c_extended[:n_rows, :n_cols] = cost_c | ||
| cost_c = cost_c_extended | ||
| elif R != C or extend_cost: | ||
| N = <uint_t>max(R, C) | ||
| if R != C: | ||
| cost_c_extended = np.zeros((N, N), dtype=np.double) | ||
| cost_c_extended[:R, :C] = cost_c | ||
| cost_c = cost_c_extended | ||
| else: | ||
| N = <uint_t>R | ||
| else: | ||
| N = <uint_t>R | ||
| cdef double **cost_ptr | ||
| cost_ptr = <double **> malloc(n * sizeof(double *)) | ||
| # Build row-pointer view | ||
| cdef double **cost_ptr = <double **> malloc(N * sizeof(double *)) | ||
| if cost_ptr == NULL: | ||
| raise MemoryError('Out of memory when allocating cost_ptr') | ||
| cdef int i | ||
| for i in range(n): | ||
| for i in range(N): | ||
| cost_ptr[i] = &cost_c[i, 0] | ||
| # Allocate x/y, build cost_ptr as before | ||
| cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_c = np.empty((n,), dtype=np.int32) | ||
| cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_c = np.empty((n,), dtype=np.int32) | ||
| # Allocate x/y | ||
| cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_c = np.empty((N,), dtype=np.int32) | ||
| cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_c = np.empty((N,), dtype=np.int32) | ||
| cdef int ret | ||
| with nogil: | ||
| ret = lapjv_internal(<uint_t> n, cost_ptr, &x_c[0], &y_c[0]) | ||
| ret = lapjv_internal(<uint_t> N, cost_ptr, &x_c[0], &y_c[0]) | ||
@@ -109,19 +138,34 @@ free(cost_ptr) | ||
| cdef double opt = np.nan | ||
| if cost_limit < np.inf or extend_cost: | ||
| x_c[x_c >= n_cols] = -1 | ||
| y_c[y_c >= n_rows] = -1 | ||
| x_c = x_c[:n_rows] | ||
| y_c = y_c[:n_cols] | ||
| if return_cost: | ||
| opt = cost_c[np.nonzero(x_c != -1)[0], x_c[x_c != -1]].sum() | ||
| elif return_cost: | ||
| opt = cost_c[np.arange(n_rows), x_c].sum() | ||
| # Trim to working rectangle (B-space) and clean artificial matches | ||
| cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_trim | ||
| cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_trim | ||
| # Construct row_indices, col_indices - only for assigned rows | ||
| assigned_mask = x_c >= 0 | ||
| row_indices = np.where(assigned_mask)[0] | ||
| col_indices = x_c[assigned_mask] | ||
| if cost_limit < np.inf or (R != C or extend_cost): | ||
| x_c[x_c >= C] = -1 | ||
| y_c[y_c >= R] = -1 | ||
| x_trim = x_c[:R] | ||
| y_trim = y_c[:C] | ||
| else: | ||
| x_trim = x_c[:R] | ||
| y_trim = y_c[:C] | ||
| # Build rows/cols in ORIGINAL orientation (A-space) | ||
| rows_b = np.nonzero(x_trim >= 0)[0].astype(np.int64, copy=False) | ||
| cols_b = x_trim[x_trim >= 0] # keep int32 dtype | ||
| if transposed: | ||
| row_indices = cols_b | ||
| col_indices = rows_b | ||
| else: | ||
| row_indices = rows_b | ||
| col_indices = cols_b | ||
| cdef double opt = 0.0 | ||
| if return_cost: | ||
| if row_indices.size: | ||
| opt = float(A[row_indices, col_indices].sum()) | ||
| else: | ||
| opt = 0.0 | ||
| if return_cost: | ||
| return opt, row_indices, col_indices | ||
@@ -140,12 +184,5 @@ else: | ||
| """ | ||
| Like lapjvx, but returns assignment pairs as a (K,2) np.ndarray of (row, col). | ||
| Returns | ||
| ------- | ||
| opt : double | ||
| Assignment cost. Not returned if return_cost is False. | ||
| assignments : (K, 2) ndarray | ||
| Each row is (row_index, col_index). | ||
| Like lapjvx, but returns assignment pairs as a (K,2) ndarray of (row, col). | ||
| Uses int32 pairs to match legacy behavior. | ||
| """ | ||
| # We call lapjvx to get the indices | ||
| if return_cost: | ||
@@ -152,0 +189,0 @@ opt, row_indices, col_indices = lapjvx(cost, extend_cost=extend_cost, |
@@ -7,2 +7,3 @@ #include <pybind11/pybind11.h> | ||
| #include <limits> | ||
| #include <type_traits> | ||
@@ -12,7 +13,11 @@ #include "dense.hpp" | ||
| /** | ||
| Updated on 2025/10/16 by rathaROG | ||
| Updated by rathaROG: | ||
| The return value is changed to include total_cost as the first element of the tuple. | ||
| This is to avoid recalculating the total cost in Python, which can be inefficient for | ||
| large matrices. | ||
| 2025/10/30: Improve speed almost 2x for rectangular matrices by using ZERO padding | ||
| for dummy rows/columns instead of LARGE_COST padding. This avoids filling the entire | ||
| padded area with LARGE_COST and speeds up rectangular cases significantly. | ||
| 2025/10/16: The return value is changed to include total_cost as the first element of | ||
| the tuple. This is to avoid recalculating the total cost in Python, which can be | ||
| inefficient for large matrices. | ||
| */ | ||
@@ -42,10 +47,10 @@ | ||
| bool any_finite = false; | ||
| T max_abs_cost = 0; | ||
| for(int i = 0; i < nrows*ncols; ++i) { | ||
| if (std::isfinite((double)data[i])) { | ||
| double max_abs_cost_d = 0.0; | ||
| for (int i = 0; i < nrows * ncols; ++i) { | ||
| // We cast to double for the finiteness check. For integer T this is always finite. | ||
| double dv = static_cast<double>(data[i]); | ||
| if (std::isfinite(dv)) { | ||
| any_finite = true; | ||
| // Careful: Note that std::abs() is not a template. | ||
| // https://en.cppreference.com/w/cpp/numeric/math/abs | ||
| // https://en.cppreference.com/w/cpp/numeric/math/fabs | ||
| max_abs_cost = std::max<T>(max_abs_cost, std::abs(data[i])); | ||
| // Use fabs on double to avoid template pitfalls and integer overflow on abs(INT_MIN). | ||
| max_abs_cost_d = std::max(max_abs_cost_d, std::fabs(dv)); | ||
| } | ||
@@ -63,15 +68,33 @@ } | ||
| const int n = std::max<int>(nrows, ncols); | ||
| const T LARGE_COST = 2 * r * max_abs_cost + 1; | ||
| std::vector<std::vector<T>> costs(n, std::vector<T>(n, LARGE_COST)); | ||
| for (int i = 0; i < nrows; i++) | ||
| { | ||
| T *cptr = data + i*ncols; | ||
| for (int j = 0; j < ncols; j++) | ||
| { | ||
| // Compute a LARGE_COST sentinel for forbidden entries within the original rectangle. | ||
| // Use double to compute and clamp if T is integral to avoid overflow. | ||
| double large_cost_d = 2.0 * static_cast<double>(r) * max_abs_cost_d + 1.0; | ||
| T LARGE_COST; | ||
| if constexpr (std::is_floating_point<T>::value) { | ||
| LARGE_COST = static_cast<T>(large_cost_d); | ||
| } else { | ||
| // Clamp to a safe large value for integer types. | ||
| using Lim = std::numeric_limits<T>; | ||
| double cap = std::min<double>(static_cast<double>(Lim::max()) / 2.0, large_cost_d); | ||
| LARGE_COST = static_cast<T>(cap); | ||
| } | ||
| // Build square cost matrix with ZERO padding for dummy rows/columns (fast for rectangular). | ||
| // Only set LARGE_COST for forbidden entries inside the original MxN region. | ||
| std::vector<std::vector<T>> costs(n, std::vector<T>(n, T(0))); | ||
| for (int i = 0; i < nrows; ++i) { | ||
| T *cptr = data + i * ncols; | ||
| for (int j = 0; j < ncols; ++j) { | ||
| const T c = cptr[j]; | ||
| if (std::isfinite((double)c)) | ||
| costs[i][j] = c; | ||
| // For floats: non-finite => forbidden. For integers: always finite => allowed. | ||
| bool finite = std::isfinite(static_cast<double>(c)); | ||
| costs[i][j] = finite ? c : LARGE_COST; | ||
| } | ||
| } | ||
| // Note: | ||
| // - If nrows < ncols, rows [nrows..n-1] remain zero => dummy rows. | ||
| // - If ncols < nrows, cols [ncols..n-1] remain zero => dummy columns. | ||
| // This avoids filling the entire padded area with LARGE_COST and speeds up rectangular cases. | ||
@@ -82,12 +105,11 @@ std::vector<int> Lmate, Rmate; | ||
| std::vector<int> rowids, colids; | ||
| T total_cost = 0; // NEW: accumulate total assignment cost | ||
| T total_cost = T(0); | ||
| for (int i = 0; i < nrows; i++) | ||
| { | ||
| // Collect only real (row, col) matches. Exclude dummy columns (j >= ncols) and forbidden. | ||
| for (int i = 0; i < nrows; ++i) { | ||
| int mate = Lmate[i]; | ||
| if (Lmate[i] < ncols && costs[i][mate] != LARGE_COST) | ||
| { | ||
| if (mate >= 0 && mate < ncols && costs[i][mate] != LARGE_COST) { | ||
| rowids.push_back(i); | ||
| colids.push_back(mate); | ||
| total_cost += costs[i][mate]; // Sum up the cost for each assigned pair | ||
| total_cost = static_cast<T>(static_cast<double>(total_cost) + static_cast<double>(costs[i][mate])); | ||
| } | ||
@@ -97,6 +119,8 @@ } | ||
| if (return_cost) | ||
| return py::make_tuple(total_cost, py::array(rowids.size(), rowids.data()), py::array(colids.size(), colids.data())); | ||
| return py::make_tuple(total_cost, | ||
| py::array(rowids.size(), rowids.data()), | ||
| py::array(colids.size(), colids.data())); | ||
| else | ||
| return py::make_tuple(py::array(rowids.size(), rowids.data()), py::array(colids.size(), colids.data())); | ||
| return py::make_tuple(py::array(rowids.size(), rowids.data()), | ||
| py::array(colids.size(), colids.data())); | ||
| } |
| # Copyright (c) 2025 Ratha SIV | MIT License | ||
| import os | ||
| import numpy as np | ||
| from typing import List, Tuple, Union | ||
| from concurrent.futures import ThreadPoolExecutor, as_completed | ||
| from .lapjvs import lapjvs as _lapjvs_single | ||
| from .lapjvs import lapjvsa as _lapjvsa_single | ||
| def _normalize_threads(n_threads: int) -> int: | ||
| if n_threads is None or n_threads == 0: | ||
| cpu = os.cpu_count() or 1 | ||
| return max(1, int(cpu)) | ||
| return max(1, int(n_threads)) | ||
| def _solve_one_jvs( | ||
| a2d: np.ndarray, | ||
| extend_cost: bool, | ||
| prefer_float32: bool, | ||
| jvx_like: bool, | ||
| return_cost: bool, | ||
| ): | ||
| # Calls the proven single-instance wrapper; it releases the GIL internally. | ||
| return _lapjvs_single( | ||
| a2d, extend_cost=extend_cost, return_cost=return_cost, | ||
| jvx_like=jvx_like, prefer_float32=prefer_float32 | ||
| ) | ||
| def _solve_one_jvsa( | ||
| a2d: np.ndarray, | ||
| extend_cost: bool, | ||
| prefer_float32: bool, | ||
| return_cost: bool, | ||
| ): | ||
| return _lapjvsa_single( | ||
| a2d, extend_cost=extend_cost, return_cost=return_cost, | ||
| prefer_float32=prefer_float32 | ||
| ) | ||
| def lapjvs_batch( | ||
| costs: np.ndarray, | ||
| extend_cost: bool = False, | ||
| return_cost: bool = True, | ||
| n_threads: int = 0, | ||
| prefer_float32: bool = True, | ||
| ) -> Union[ | ||
| Tuple[np.ndarray, List[np.ndarray], List[np.ndarray]], | ||
| Tuple[List[np.ndarray], List[np.ndarray]] | ||
| ]: | ||
| """ | ||
| Stable batched JVS built on the single-instance solver with a thread pool. | ||
| - costs: (B, N, M) array-like (float32/float64) | ||
| - extend_cost: pad rectangular inputs to square | ||
| - return_cost: if True, returns totals (B,) first | ||
| - n_threads: 0 -> os.cpu_count(), else exact number of threads | ||
| - prefer_float32: forward to single-instance kernel | ||
| Returns: | ||
| if return_cost: | ||
| (totals: (B,), rows_list: List[(K_b,)], cols_list: List[(K_b,)]) | ||
| else: | ||
| (rows_list, cols_list) | ||
| """ | ||
| A = np.asarray(costs) | ||
| if A.ndim != 3: | ||
| raise ValueError("3-dimensional array expected [B, N, M]") | ||
| B, N, M = A.shape | ||
| threads = _normalize_threads(n_threads) | ||
| # Preallocate outputs | ||
| totals = np.empty((B,), dtype=np.float64) if return_cost else None | ||
| rows_list: List[np.ndarray] = [None] * B # type: ignore | ||
| cols_list: List[np.ndarray] = [None] * B # type: ignore | ||
| # Worker | ||
| def work(bi: int): | ||
| a2d = A[bi] | ||
| if return_cost: | ||
| total, rows, cols = _solve_one_jvs( | ||
| a2d, extend_cost=extend_cost, prefer_float32=prefer_float32, | ||
| jvx_like=True, return_cost=True | ||
| ) | ||
| return bi, total, rows, cols | ||
| else: | ||
| rows, cols = _solve_one_jvs( | ||
| a2d, extend_cost=extend_cost, prefer_float32=prefer_float32, | ||
| jvx_like=True, return_cost=False | ||
| ) | ||
| return bi, None, rows, cols | ||
| if threads == 1 or B == 1: | ||
| # Serial path (still GIL-free inside the solver) | ||
| for bi in range(B): | ||
| idx, t, r, c = work(bi) | ||
| if return_cost: | ||
| totals[idx] = float(t) # type: ignore | ||
| rows_list[idx] = np.asarray(r, dtype=np.int64) | ||
| cols_list[idx] = np.asarray(c, dtype=np.int64) | ||
| else: | ||
| # Parallel path (ThreadPool is OK because solver releases GIL) | ||
| with ThreadPoolExecutor(max_workers=threads) as ex: | ||
| futures = [ex.submit(work, bi) for bi in range(B)] | ||
| for fut in as_completed(futures): | ||
| idx, t, r, c = fut.result() | ||
| if return_cost: | ||
| totals[idx] = float(t) # type: ignore | ||
| rows_list[idx] = np.asarray(r, dtype=np.int64) | ||
| cols_list[idx] = np.asarray(c, dtype=np.int64) | ||
| if return_cost: | ||
| return np.asarray(totals, dtype=np.float64), rows_list, cols_list # type: ignore | ||
| else: | ||
| return rows_list, cols_list | ||
| def lapjvsa_batch( | ||
| costs: np.ndarray, | ||
| extend_cost: bool = False, | ||
| return_cost: bool = True, | ||
| n_threads: int = 0, | ||
| prefer_float32: bool = True, | ||
| ) -> Union[Tuple[np.ndarray, List[np.ndarray]], List[np.ndarray]]: | ||
| """ | ||
| Stable batched JVS (pairs API) built on the single-instance wrapper with a thread pool. | ||
| Returns: | ||
| if return_cost: | ||
| (totals: (B,), pairs_list: List[(K_b, 2)]) | ||
| else: | ||
| (pairs_list,) | ||
| """ | ||
| A = np.asarray(costs) | ||
| if A.ndim != 3: | ||
| raise ValueError("3-dimensional array expected [B, N, M]") | ||
| B = A.shape[0] | ||
| threads = _normalize_threads(n_threads) | ||
| totals = np.empty((B,), dtype=np.float64) if return_cost else None | ||
| pairs_list: List[np.ndarray] = [None] * B # type: ignore | ||
| def work(bi: int): | ||
| a2d = A[bi] | ||
| if return_cost: | ||
| total, pairs = _solve_one_jvsa( | ||
| a2d, extend_cost=extend_cost, prefer_float32=prefer_float32, return_cost=True | ||
| ) | ||
| return bi, total, pairs | ||
| else: | ||
| pairs = _solve_one_jvsa( | ||
| a2d, extend_cost=extend_cost, prefer_float32=prefer_float32, return_cost=False | ||
| ) | ||
| return bi, None, pairs | ||
| if threads == 1 or B == 1: | ||
| for bi in range(B): | ||
| idx, t, P = work(bi) | ||
| if return_cost: | ||
| totals[idx] = float(t) # type: ignore | ||
| pairs_list[idx] = np.asarray(P, dtype=np.int64) | ||
| else: | ||
| with ThreadPoolExecutor(max_workers=threads) as ex: | ||
| futures = [ex.submit(work, bi) for bi in range(B)] | ||
| for fut in as_completed(futures): | ||
| idx, t, P = fut.result() | ||
| if return_cost: | ||
| totals[idx] = float(t) # type: ignore | ||
| pairs_list[idx] = np.asarray(P, dtype=np.int64) | ||
| if return_cost: | ||
| return np.asarray(totals, dtype=np.float64), pairs_list # type: ignore | ||
| else: | ||
| return pairs_list | ||
-228
| # Copyright (c) 2025 Ratha SIV | MIT License | ||
| import numpy as np | ||
| from typing import Optional, Tuple, Union | ||
| from ._lapjvs import lapjvs_native as _lapjvs_native | ||
| from ._lapjvs import lapjvs_float32 as _lapjvs_float32 | ||
| from ._lapjvs import lapjvsa_native as _lapjvsa_native | ||
| from ._lapjvs import lapjvsa_float32 as _lapjvsa_float32 | ||
| def lapjvs( | ||
| cost: np.ndarray, | ||
| extend_cost: Optional[bool] = None, | ||
| return_cost: bool = True, | ||
| jvx_like: bool = True, | ||
| prefer_float32: bool = True, | ||
| ) -> Union[ | ||
| Tuple[float, np.ndarray, np.ndarray], | ||
| Tuple[np.ndarray, np.ndarray] | ||
| ]: | ||
| """ | ||
| Solve the Linear Assignment Problem using the 'lapjvs' algorithm. | ||
| - Accepts rectangular cost matrices (pad to square internally). | ||
| - Returns lapjv-like (x,y) or jvx-like (rows, cols). | ||
| - Optional prefer_float32 to reduce memory bandwidth. | ||
| Parameters | ||
| ---------- | ||
| cost : np.ndarray | ||
| Cost matrix of shape (n, m). dtype should be a floating type. | ||
| extend_cost : Optional[bool] | ||
| If True, treat rectangular inputs by extending (zero-padding) to square. | ||
| If False, require square input. | ||
| If None (default), auto-detect: extend if n != m. | ||
| return_cost : bool | ||
| If True (default), return total cost as the first element of the return tuple. | ||
| jvx_like : bool | ||
| If False (default), return lapjv-style mapping vectors: | ||
| - return_cost=True -> (total_cost, x, y) | ||
| - return_cost=False -> (x, y) | ||
| If True, return lapjvx/SciPy-style arrays: | ||
| - return_cost=True -> (total_cost, row_indices, col_indices) | ||
| - return_cost=False -> (row_indices, col_indices) | ||
| Notes | ||
| ----- | ||
| - The solver kernel may run in float32 (when prefer_float32=True) or native float64, | ||
| but the returned total cost is always recomputed from the ORIGINAL input array | ||
| to preserve previous numeric behavior and parity with lapjv/lapjvx. | ||
| - For rectangular inputs, zero-padding exactly models the rectangular LAP. | ||
| """ | ||
| # Keep the original array to compute the final cost from it (preserves previous behavior) | ||
| a = np.asarray(cost) | ||
| if a.ndim != 2: | ||
| raise ValueError("cost must be a 2D array") | ||
| n, m = a.shape | ||
| extend = (n != m) if (extend_cost is None) else bool(extend_cost) | ||
| # Choose backend and working dtype for the solver only (ensure contiguity to avoid hidden copies) | ||
| use_float32_kernel = not ((prefer_float32 is False) and (a.dtype == np.float64)) | ||
| if use_float32_kernel: | ||
| # Run float32 kernel (casting as needed) | ||
| _kernel = _lapjvs_float32 | ||
| work = np.ascontiguousarray(a, dtype=np.float32) | ||
| else: | ||
| # Run native kernel on float64 inputs | ||
| _kernel = _lapjvs_native | ||
| work = np.ascontiguousarray(a, dtype=np.float64) | ||
| def _rows_cols_from_x(x_vec: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: | ||
| if x_vec.size == 0: | ||
| return np.empty((0,), dtype=np.int64), np.empty((0,), dtype=np.int64) | ||
| mask = x_vec >= 0 | ||
| rows = np.nonzero(mask)[0] | ||
| cols = x_vec[mask] | ||
| return rows.astype(np.int64, copy=False), cols.astype(np.int64, copy=False) | ||
| if not extend: | ||
| # Square: call solver directly on chosen dtype, but compute total from ORIGINAL array | ||
| x_raw_obj, y_raw_obj = _kernel(work) | ||
| # Convert only what's needed; y conversion deferred if not used | ||
| x_raw = np.asarray(x_raw_obj, dtype=np.int64) | ||
| if jvx_like: | ||
| rows, cols = _rows_cols_from_x(x_raw) | ||
| if return_cost: | ||
| total = float(a[np.arange(n), x_raw].sum()) if n > 0 else 0.0 | ||
| return total, rows, cols | ||
| else: | ||
| return rows, cols | ||
| else: | ||
| y_raw = np.asarray(y_raw_obj, dtype=np.int64) | ||
| if return_cost: | ||
| total = float(a[np.arange(n), x_raw].sum()) if n > 0 else 0.0 | ||
| return total, x_raw, y_raw | ||
| else: | ||
| return x_raw, y_raw | ||
| # Rectangular: zero-pad to square, solve, then trim back; compute total from ORIGINAL array | ||
| size = max(n, m) | ||
| padded = np.empty((size, size), dtype=work.dtype) | ||
| padded[:n, :m] = work | ||
| if m < size: | ||
| padded[:n, m:] = 0 | ||
| if n < size: | ||
| padded[n:, :] = 0 | ||
| x_pad_obj, y_pad_obj = _kernel(padded) | ||
| x_pad = np.asarray(x_pad_obj, dtype=np.int64) | ||
| cols_pad_n = x_pad[:n] | ||
| if jvx_like: | ||
| if n == 0 or m == 0: | ||
| rows = np.empty((0,), dtype=np.int64) | ||
| cols = np.empty((0,), dtype=np.int64) | ||
| total = 0.0 | ||
| else: | ||
| mask_r = (cols_pad_n >= 0) & (cols_pad_n < m) | ||
| rows = np.nonzero(mask_r)[0].astype(np.int64, copy=False) | ||
| cols = cols_pad_n[mask_r].astype(np.int64, copy=False) | ||
| total = float(a[rows, cols].sum()) if (return_cost and rows.size) else 0.0 | ||
| return (total, rows, cols) if return_cost else (rows, cols) | ||
| # lapjv-like outputs (vectorized) | ||
| x_out = np.full(n, -1, dtype=np.int64) | ||
| mask_r = (cols_pad_n >= 0) & (cols_pad_n < m) | ||
| if mask_r.any(): | ||
| x_out[mask_r] = cols_pad_n[mask_r] | ||
| y_pad = np.asarray(y_pad_obj, dtype=np.int64) | ||
| rows_pad_m = y_pad[:m] | ||
| y_out = np.full(m, -1, dtype=np.int64) | ||
| mask_c = (rows_pad_m >= 0) & (rows_pad_m < n) | ||
| if mask_c.any(): | ||
| y_out[mask_c] = rows_pad_m[mask_c] | ||
| if return_cost and n > 0 and m > 0: | ||
| mask = (x_out >= 0) | ||
| total = float(a[np.nonzero(mask)[0], x_out[mask]].sum()) if mask.any() else 0.0 | ||
| else: | ||
| total = 0.0 | ||
| return (total, x_out, y_out) if return_cost else (x_out, y_out) | ||
| def lapjvsa( | ||
| cost: np.ndarray, | ||
| extend_cost: Optional[bool] = None, | ||
| return_cost: bool = True, | ||
| prefer_float32: bool = True, | ||
| ) -> Union[ | ||
| Tuple[float, np.ndarray], | ||
| np.ndarray | ||
| ]: | ||
| """ | ||
| Solve LAP using the 'lapjvs' algorithm (pairs API). | ||
| - Accepts rectangular cost matrices (pads to square internally when needed). | ||
| - Returns pairs array (K, 2) int64 with optional total cost (computed from ORIGINAL input). | ||
| """ | ||
| a = np.asarray(cost) | ||
| if a.ndim != 2: | ||
| raise ValueError("cost must be a 2D array") | ||
| n, m = a.shape | ||
| extend = (n != m) if (extend_cost is None) else bool(extend_cost) | ||
| if not extend: | ||
| # Square: call native pairs entry point on chosen dtype | ||
| use_f32 = not ((prefer_float32 is False) and (a.dtype == np.float64)) | ||
| if use_f32: | ||
| pairs_obj = _lapjvsa_float32(np.ascontiguousarray(a, dtype=np.float32)) | ||
| else: | ||
| # Follow native dtype; if a is float32 or float64, both ok | ||
| # Ensure contiguous to avoid hidden copies | ||
| work = np.ascontiguousarray(a, dtype=a.dtype if a.dtype in (np.float32, np.float64) else np.float64) | ||
| pairs_obj = _lapjvsa_native(work) | ||
| pairs = np.asarray(pairs_obj, dtype=np.int64) | ||
| if return_cost: | ||
| if pairs.size: | ||
| r = pairs[:, 0]; c = pairs[:, 1] | ||
| total = float(a[r, c].sum()) | ||
| else: | ||
| total = 0.0 | ||
| return total, pairs | ||
| return pairs | ||
| # Rectangular: zero-pad to square, solve pairs on padded, map back, compute total from ORIGINAL | ||
| size = max(n, m) | ||
| use_f32 = not ((prefer_float32 is False) and (a.dtype == np.float64)) | ||
| wdtype = np.float32 if use_f32 else (a.dtype if a.dtype in (np.float32, np.float64) else np.float64) | ||
| padded = np.empty((size, size), dtype=wdtype) | ||
| # copy original submatrix | ||
| padded[:n, :m] = a.astype(wdtype, copy=False) | ||
| if m < size: | ||
| padded[:n, m:] = 0 | ||
| if n < size: | ||
| padded[n:, :] = 0 | ||
| # Solve pairs on the padded square matrix | ||
| if use_f32: | ||
| pairs_pad_obj = _lapjvsa_float32(padded) | ||
| else: | ||
| pairs_pad_obj = _lapjvsa_native(padded) | ||
| pairs_pad = np.asarray(pairs_pad_obj, dtype=np.int64) | ||
| # Trim pairs to original rectangle | ||
| if pairs_pad.size == 0 or n == 0 or m == 0: | ||
| pairs = np.empty((0, 2), dtype=np.int64) | ||
| total = 0.0 | ||
| else: | ||
| r = pairs_pad[:, 0] | ||
| c = pairs_pad[:, 1] | ||
| mask = (r >= 0) & (r < n) & (c >= 0) & (c < m) | ||
| if mask.any(): | ||
| pairs = np.stack([r[mask], c[mask]], axis=1).astype(np.int64, copy=False) | ||
| total = float(a[pairs[:, 0], pairs[:, 1]].sum()) if return_cost else 0.0 | ||
| else: | ||
| pairs = np.empty((0, 2), dtype=np.int64) | ||
| total = 0.0 | ||
| return (total, pairs) if return_cost else pairs |
| # Copyright (c) 2025 Ratha SIV | MIT License | ||
| import os | ||
| import numpy as np | ||
| from typing import List, Tuple, Union | ||
| from concurrent.futures import ThreadPoolExecutor, as_completed | ||
| from ._lapjvx import lapjvx as _lapjvx_single | ||
| from ._lapjvx import lapjvxa as _lapjvxa_single | ||
| def _normalize_threads(n_threads: int) -> int: | ||
| if not n_threads: | ||
| return max(1, int(os.cpu_count() or 1)) | ||
| return max(1, int(n_threads)) | ||
| def lapjvx_batch( | ||
| costs: np.ndarray, | ||
| extend_cost: bool = False, | ||
| cost_limit: float = np.inf, | ||
| return_cost: bool = True, | ||
| n_threads: int = 0, | ||
| ) -> Union[Tuple[np.ndarray, List[np.ndarray], List[np.ndarray]], | ||
| Tuple[List[np.ndarray], List[np.ndarray]]]: | ||
| A = np.asarray(costs) | ||
| if A.ndim != 3: | ||
| raise ValueError("3-dimensional array expected [B, N, M]") | ||
| B = A.shape[0] | ||
| threads = _normalize_threads(n_threads) | ||
| totals = np.empty((B,), dtype=np.float64) if return_cost else None | ||
| rows_list: List[np.ndarray] = [None] * B # type: ignore | ||
| cols_list: List[np.ndarray] = [None] * B # type: ignore | ||
| def work(bi: int): | ||
| a2d = A[bi] | ||
| if return_cost: | ||
| total, rows, cols = _lapjvx_single( | ||
| a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=True | ||
| ) | ||
| return bi, total, rows, cols | ||
| else: | ||
| rows, cols = _lapjvx_single( | ||
| a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=False | ||
| ) | ||
| return bi, None, rows, cols | ||
| if threads == 1 or B == 1: | ||
| for bi in range(B): | ||
| i, t, r, c = work(bi) | ||
| if return_cost: | ||
| totals[i] = float(t) # type: ignore | ||
| rows_list[i] = np.asarray(r, dtype=np.int64) | ||
| cols_list[i] = np.asarray(c, dtype=np.int64) | ||
| else: | ||
| with ThreadPoolExecutor(max_workers=threads) as ex: | ||
| futures = [ex.submit(work, bi) for bi in range(B)] | ||
| for fut in as_completed(futures): | ||
| i, t, r, c = fut.result() | ||
| if return_cost: | ||
| totals[i] = float(t) # type: ignore | ||
| rows_list[i] = np.asarray(r, dtype=np.int64) | ||
| cols_list[i] = np.asarray(c, dtype=np.int64) | ||
| if return_cost: | ||
| return np.asarray(totals, dtype=np.float64), rows_list, cols_list # type: ignore | ||
| return rows_list, cols_list | ||
| def lapjvxa_batch( | ||
| costs: np.ndarray, | ||
| extend_cost: bool = False, | ||
| cost_limit: float = np.inf, | ||
| return_cost: bool = True, | ||
| n_threads: int = 0, | ||
| ) -> Union[Tuple[np.ndarray, List[np.ndarray]], List[np.ndarray]]: | ||
| A = np.asarray(costs) | ||
| if A.ndim != 3: | ||
| raise ValueError("3-dimensional array expected [B, N, M]") | ||
| B = A.shape[0] | ||
| threads = _normalize_threads(n_threads) | ||
| totals = np.empty((B,), dtype=np.float64) if return_cost else None | ||
| pairs_list: List[np.ndarray] = [None] * B # type: ignore | ||
| def work(bi: int): | ||
| a2d = A[bi] | ||
| if return_cost: | ||
| total, pairs = _lapjvxa_single( | ||
| a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=True | ||
| ) | ||
| return bi, total, pairs | ||
| else: | ||
| pairs = _lapjvxa_single( | ||
| a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=False | ||
| ) | ||
| return bi, None, pairs | ||
| if threads == 1 or B == 1: | ||
| for bi in range(B): | ||
| i, t, P = work(bi) | ||
| if return_cost: | ||
| totals[i] = float(t) # type: ignore | ||
| pairs_list[i] = np.asarray(P, dtype=np.int64) | ||
| else: | ||
| with ThreadPoolExecutor(max_workers=threads) as ex: | ||
| futures = [ex.submit(work, bi) for bi in range(B)] | ||
| for fut in as_completed(futures): | ||
| i, t, P = fut.result() | ||
| if return_cost: | ||
| totals[i] = float(t) # type: ignore | ||
| pairs_list[i] = np.asarray(P, dtype=np.int64) | ||
| if return_cost: | ||
| return np.asarray(totals, dtype=np.float64), pairs_list # type: ignore | ||
| return pairs_list | ||
-342
| # Copyright (c) 2012-2025 Tomas Kazmar | BSD-2-Clause License | ||
| import numpy as np | ||
| from bisect import bisect_left | ||
| # import logging | ||
| from ._lapjv import _lapmod, FP_DYNAMIC_ as FP_DYNAMIC, LARGE_ as LARGE | ||
| def _pycrrt(n, cc, ii, kk, free_rows, x, y, v): | ||
| # log = logging.getLogger('do_column_reduction_and_reduction_transfer') | ||
| x[:] = -1 | ||
| y[:] = -1 | ||
| v[:] = LARGE | ||
| for i in range(n): | ||
| ks = slice(ii[i], ii[i+1]) | ||
| js = kk[ks] | ||
| ccs = cc[ks] | ||
| mask = ccs < v[js] | ||
| js = js[mask] | ||
| v[js] = ccs[mask] | ||
| y[js] = i | ||
| # log.debug('v = %s', v) | ||
| # for j in range(cost.shape[1]): | ||
| unique = np.empty((n,), dtype=bool) | ||
| unique[:] = True | ||
| for j in range(n-1, -1, -1): | ||
| i = y[j] | ||
| # If row is not taken yet, initialize it with the minimum stored in y. | ||
| if x[i] < 0: | ||
| x[i] = j | ||
| else: | ||
| unique[i] = False | ||
| y[j] = -1 | ||
| # log.debug('bw %s %s %s %s', i, j, x, y) | ||
| # log.debug('unique %s', unique) | ||
| n_free_rows = 0 | ||
| for i in range(n): | ||
| # Store unassigned row i. | ||
| if x[i] < 0: | ||
| free_rows[n_free_rows] = i | ||
| n_free_rows += 1 | ||
| elif unique[i] and ii[i+1] - ii[i] > 1: | ||
| # >1 check prevents choking on rows with a single entry | ||
| # Transfer from an assigned row. | ||
| j = x[i] | ||
| # Find the current 2nd minimum of the reduced column costs: | ||
| # (cost[i,j] - v[j]) for some j. | ||
| ks = slice(ii[i], ii[i+1]) | ||
| js = kk[ks] | ||
| minv = np.min(cc[ks][js != j] - v[js][js != j]) | ||
| # log.debug("v[%d] = %f - %f", j, v[j], minv) | ||
| v[j] -= minv | ||
| # log.debug('free: %s', free_rows[:n_free_rows]) | ||
| # log.debug('%s %s', x, v) | ||
| return n_free_rows | ||
| def find_minima(indices, values): | ||
| if len(indices) > 0: | ||
| j1 = indices[0] | ||
| v1 = values[0] | ||
| else: | ||
| j1 = 0 | ||
| v1 = LARGE | ||
| j2 = -1 | ||
| v2 = LARGE | ||
| # log = logging.getLogger('find_minima') | ||
| # log.debug(sorted(zip(values, indices))[:2]) | ||
| for j, h in zip(indices[1:], values[1:]): | ||
| # log.debug('%d = %f %d = %f', j1, v1, j2, v2) | ||
| if h < v2: | ||
| if h >= v1: | ||
| v2 = h | ||
| j2 = j | ||
| else: | ||
| v2 = v1 | ||
| v1 = h | ||
| j2 = j1 | ||
| j1 = j | ||
| # log.debug('%d = %f %d = %f', j1, v1, j2, v2) | ||
| return j1, v1, j2, v2 | ||
| def _pyarr(n, cc, ii, kk, n_free_rows, free_rows, x, y, v): | ||
| # log = logging.getLogger('do_augmenting_row_reduction') | ||
| # log.debug('%s %s %s', x, y, v) | ||
| current = 0 | ||
| # log.debug('free: %s', free_rows[:n_free_rows]) | ||
| new_free_rows = 0 | ||
| while current < n_free_rows: | ||
| free_i = free_rows[current] | ||
| # log.debug('current = %d', current) | ||
| current += 1 | ||
| ks = slice(ii[free_i], ii[free_i+1]) | ||
| js = kk[ks] | ||
| j1, v1, j2, v2 = find_minima(js, cc[ks] - v[js]) | ||
| i0 = y[j1] | ||
| v1_new = v[j1] - (v2 - v1) | ||
| v1_lowers = v1_new < v[j1] | ||
| # log.debug( | ||
| # '%d %d 1=%s,%f 2=%s,%f %f %s', | ||
| # free_i, i0, j1, v1, j2, v2, v1_new, v1_lowers) | ||
| if v1_lowers: | ||
| v[j1] = v1_new | ||
| elif i0 >= 0 and j2 != -1: # i0 is assigned, try j2 | ||
| j1 = j2 | ||
| i0 = y[j2] | ||
| x[free_i] = j1 | ||
| y[j1] = free_i | ||
| if i0 >= 0: | ||
| if v1_lowers: | ||
| current -= 1 | ||
| # log.debug('continue augmenting path from current %s %s %s') | ||
| free_rows[current] = i0 | ||
| else: | ||
| # log.debug('stop the augmenting path and keep for later') | ||
| free_rows[new_free_rows] = i0 | ||
| new_free_rows += 1 | ||
| # log.debug('free: %s', free_rows[:new_free_rows]) | ||
| return new_free_rows | ||
| def binary_search(data, key): | ||
| # log = logging.getLogger('binary_search') | ||
| i = bisect_left(data, key) | ||
| # log.debug('Found data[%d]=%d for %d', i, data[i], key) | ||
| if i < len(data) and data[i] == key: | ||
| return i | ||
| else: | ||
| return None | ||
| def _find(hi, d, cols, y): | ||
| lo, hi = hi, hi + 1 | ||
| minv = d[cols[lo]] | ||
| # XXX: anytime this happens to be NaN, i'm screwed... | ||
| # assert not np.isnan(minv) | ||
| for k in range(hi, len(cols)): | ||
| j = cols[k] | ||
| if d[j] <= minv: | ||
| # New minimum found, trash the new SCAN columns found so far. | ||
| if d[j] < minv: | ||
| hi = lo | ||
| minv = d[j] | ||
| cols[k], cols[hi] = cols[hi], j | ||
| hi += 1 | ||
| return minv, hi, cols | ||
| def _scan(n, cc, ii, kk, minv, lo, hi, d, cols, pred, y, v): | ||
| # log = logging.getLogger('_scan') | ||
| # Scan all TODO columns. | ||
| while lo != hi: | ||
| j = cols[lo] | ||
| lo += 1 | ||
| i = y[j] | ||
| # log.debug('?%d kk[%d:%d]=%s', j, ii[i], ii[i+1], kk[ii[i]:ii[i+1]]) | ||
| kj = binary_search(kk[ii[i]:ii[i+1]], j) | ||
| if kj is None: | ||
| continue | ||
| kj = ii[i] + kj | ||
| h = cc[kj] - v[j] - minv | ||
| # log.debug('i=%d j=%d kj=%s h=%f', i, j, kj, h) | ||
| for k in range(hi, n): | ||
| j = cols[k] | ||
| kj = binary_search(kk[ii[i]:ii[i+1]], j) | ||
| if kj is None: | ||
| continue | ||
| kj = ii[i] + kj | ||
| cred_ij = cc[kj] - v[j] - h | ||
| if cred_ij < d[j]: | ||
| d[j] = cred_ij | ||
| pred[j] = i | ||
| if cred_ij == minv: | ||
| if y[j] < 0: | ||
| return j, None, None, d, cols, pred | ||
| cols[k] = cols[hi] | ||
| cols[hi] = j | ||
| hi += 1 | ||
| return -1, lo, hi, d, cols, pred | ||
| def find_path(n, cc, ii, kk, start_i, y, v): | ||
| # log = logging.getLogger('find_path') | ||
| cols = np.arange(n, dtype=int) | ||
| pred = np.empty((n,), dtype=int) | ||
| pred[:] = start_i | ||
| d = np.empty((n,), dtype=float) | ||
| d[:] = LARGE | ||
| ks = slice(ii[start_i], ii[start_i+1]) | ||
| js = kk[ks] | ||
| d[js] = cc[ks] - v[js] | ||
| # log.debug('d = %s', d) | ||
| minv = LARGE | ||
| lo, hi = 0, 0 | ||
| n_ready = 0 | ||
| final_j = -1 | ||
| while final_j == -1: | ||
| # No SCAN columns, find new ones. | ||
| if lo == hi: | ||
| # log.debug('%d..%d -> find', lo, hi) | ||
| # log.debug('cols = %s', cols) | ||
| n_ready = lo | ||
| minv, hi, cols = _find(hi, d, cols, y) | ||
| # log.debug('%d..%d -> check', lo, hi) | ||
| # log.debug('cols = %s', cols) | ||
| # log.debug('y = %s', y) | ||
| for h in range(lo, hi): | ||
| # If any of the new SCAN columns is unassigned, use it. | ||
| if y[cols[h]] < 0: | ||
| final_j = cols[h] | ||
| if final_j == -1: | ||
| # log.debug('%d..%d -> scan', lo, hi) | ||
| final_j, lo, hi, d, cols, pred = _scan( | ||
| n, cc, ii, kk, minv, lo, hi, d, cols, pred, y, v) | ||
| # log.debug('d = %s', d) | ||
| # log.debug('cols = %s', cols) | ||
| # log.debug('pred = %s', pred) | ||
| # Update prices for READY columns. | ||
| for k in range(n_ready): | ||
| j0 = cols[k] | ||
| v[j0] += d[j0] - minv | ||
| assert final_j >= 0 | ||
| assert final_j < n | ||
| return final_j, pred | ||
| def _pya(n, cc, ii, kk, n_free_rows, free_rows, x, y, v): | ||
| # log = logging.getLogger('augment') | ||
| for free_i in free_rows[:n_free_rows]: | ||
| # log.debug('looking at free_i=%s', free_i) | ||
| j, pred = find_path(n, cc, ii, kk, free_i, y, v) | ||
| # Augment the path starting from column j and backtracking to free_i. | ||
| i = -1 | ||
| while i != free_i: | ||
| # log.debug('augment %s', j) | ||
| # log.debug('pred = %s', pred) | ||
| i = pred[j] | ||
| assert i >= 0 | ||
| assert i < n | ||
| # log.debug('y[%d]=%d -> %d', j, y[j], i) | ||
| y[j] = i | ||
| j, x[i] = x[i], j | ||
| def check_cost(n, cc, ii, kk): | ||
| if n == 0: | ||
| raise ValueError('Cost matrix has zero rows.') | ||
| if len(kk) == 0: | ||
| raise ValueError('Cost matrix has zero columns.') | ||
| lo = cc.min() | ||
| hi = cc.max() | ||
| if lo < 0: | ||
| raise ValueError('Cost matrix values must be non-negative.') | ||
| if hi >= LARGE: | ||
| raise ValueError( | ||
| 'Cost matrix values must be less than %s' % LARGE) | ||
| def get_cost(n, cc, ii, kk, x0): | ||
| ret = 0 | ||
| for i, j in enumerate(x0): | ||
| kj = binary_search(kk[ii[i]:ii[i+1]], j) | ||
| if kj is None: | ||
| return np.inf | ||
| kj = ii[i] + kj | ||
| ret += cc[kj] | ||
| return ret | ||
| def lapmod(n, cc, ii, kk, fast=True, return_cost=True, fp_version=FP_DYNAMIC): | ||
| """Solve sparse linear assignment problem using Jonker-Volgenant algorithm. | ||
| n: number of rows of the assignment cost matrix | ||
| cc: 1D array of all finite elements of the assignement cost matrix | ||
| ii: 1D array of indices of the row starts in cc. The following must hold: | ||
| ii[0] = 0 and ii[n+1] = len(cc). | ||
| kk: 1D array of the column indices so that: | ||
| cost[i, kk[ii[i] + k]] == cc[ii[i] + k]. | ||
| Indices within one row must be sorted. | ||
| extend_cost: whether or not extend a non-square matrix [default: False] | ||
| cost_limit: an upper limit for a cost of a single assignment | ||
| [default: np.inf] | ||
| return_cost: whether or not to return the assignment cost | ||
| Returns (opt, x, y) where: | ||
| opt: cost of the assignment | ||
| x: vector of columns assigned to rows | ||
| y: vector of rows assigned to columns | ||
| or (x, y) if return_cost is not True. | ||
| When extend_cost and/or cost_limit is set, all unmatched entries will be | ||
| marked by -1 in x/y. | ||
| """ | ||
| # log = logging.getLogger('lapmod') | ||
| check_cost(n, cc, ii, kk) | ||
| if fast is True: | ||
| # log.debug('[----CR & RT & ARR & augmentation ----]') | ||
| x, y = _lapmod(n, cc, ii, kk, fp_version=fp_version) | ||
| else: | ||
| cc = np.ascontiguousarray(cc, dtype=np.float64) | ||
| ii = np.ascontiguousarray(ii, dtype=np.int32) | ||
| kk = np.ascontiguousarray(kk, dtype=np.int32) | ||
| x = np.empty((n,), dtype=np.int32) | ||
| y = np.empty((n,), dtype=np.int32) | ||
| v = np.empty((n,), dtype=np.float64) | ||
| free_rows = np.empty((n,), dtype=np.int32) | ||
| # log.debug('[----Column reduction & reduction transfer----]') | ||
| n_free_rows = _pycrrt(n, cc, ii, kk, free_rows, x, y, v) | ||
| # log.debug( | ||
| # 'free, x, y, v: %s %s %s %s', free_rows[:n_free_rows], x, y, v) | ||
| if n_free_rows == 0: | ||
| # log.info('Reduction solved it.') | ||
| if return_cost is True: | ||
| return get_cost(n, cc, ii, kk, x), x, y | ||
| else: | ||
| return x, y | ||
| for it in range(2): | ||
| # log.debug('[---Augmenting row reduction (iteration: %d)---]', it) | ||
| n_free_rows = _pyarr( | ||
| n, cc, ii, kk, n_free_rows, free_rows, x, y, v) | ||
| # log.debug( | ||
| # 'free, x, y, v: %s %s %s %s', free_rows[:n_free_rows], x, y, v) | ||
| if n_free_rows == 0: | ||
| # log.info('Augmenting row reduction solved it.') | ||
| if return_cost is True: | ||
| return get_cost(n, cc, ii, kk, x), x, y | ||
| else: | ||
| return x, y | ||
| # log.info('[----Augmentation----]') | ||
| _pya(n, cc, ii, kk, n_free_rows, free_rows, x, y, v) | ||
| # log.debug('x, y, v: %s %s %s', x, y, v) | ||
| if return_cost is True: | ||
| return get_cost(n, cc, ii, kk, x), x, y | ||
| else: | ||
| return x, y |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
1659271
10.62%37
8.82%1433
48.19%