Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoSign in
Socket

lapx

Package Overview
Dependencies
Maintainers
1
Versions
28
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

lapx - pypi Package Compare versions

Comparing version
0.8.1
to
0.9.0
+67
lap/_lapjv_wp.py
# Copyright (c) 2025 Ratha SIV | MIT License
import numpy as np
from typing import Tuple, Union
from ._lapjv import lapjv as _lapjv
def lapjv(
cost: np.ndarray,
extend_cost: bool = False,
cost_limit: float = np.inf,
return_cost: bool = True,
) -> Union[
Tuple[float, np.ndarray, np.ndarray],
Tuple[np.ndarray, np.ndarray],
]:
"""
Solve the Linear Assignment Problem using the Jonker-Volgenant (JV) algorithm.
This wrapper returns lapjv-style mapping vectors (x, y), where:
- x[i] is the assigned column index for row i, or -1 if unassigned.
- y[j] is the assigned row index for column j, or -1 if unassigned.
Parameters
----------
cost : np.ndarray, shape (N, M)
2D cost matrix. Entry cost[i, j] is the cost of assigning row i to column j.
Any float dtype is accepted; internally a single contiguous float64 buffer is used when needed.
extend_cost : bool, default False
Permit rectangular inputs by zero-padding to a square matrix.
See the unified augmentation policy below.
cost_limit : float, default np.inf
When finite, the solver augments to size (N+M) with sentinel edges of cost_limit/2
and a bottom-right zero block. This models a per-edge "reject" cost and allows
rectangular inputs even if extend_cost=False.
return_cost : bool, default True
If True, include the total assignment cost as the first return value.
The total is computed from the ORIGINAL (un-augmented/unpadded) input array.
Returns
-------
If return_cost is True:
total_cost : float
Sum of costs over matched pairs, computed on the ORIGINAL input.
x : np.ndarray[int32] with shape (N,)
Mapping from rows to columns; -1 for unassigned rows.
y : np.ndarray[int32] with shape (M,)
Mapping from columns to rows; -1 for unassigned columns.
Else:
x : np.ndarray[int32] with shape (N,)
y : np.ndarray[int32] with shape (M,)
Unified augmentation policy
---------------------------
- If cost_limit < inf: always augment to (N+M) to model per-edge rejects (rectangular allowed).
- Else if (N != M) or extend_cost=True: zero-pad to a square of size max(N, M).
- Else (square, un-augmented): run on the given square matrix.
Notes
-----
- Orientation is normalized internally (kernel works with rows <= cols); outputs are mapped
back to the ORIGINAL orientation before returning.
- For zero-sized dimensions, the solver returns 0.0 (if requested) and all -1 mappings.
- This wrapper forwards directly to the Cython implementation without altering dtypes.
"""
return _lapjv(cost, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=return_cost)
# Copyright (c) 2025 Ratha SIV | MIT License
import numpy as np
from typing import Tuple, Union
from ._lapjvc import lapjvc as _lapjvc # type: ignore
def lapjvc(
cost: np.ndarray,
return_cost: bool = True,
) -> Union[
Tuple[float, np.ndarray, np.ndarray],
Tuple[np.ndarray, np.ndarray],
]:
"""
Solve the Linear Assignment Problem using the classic dense Jonker-Volgenant algorithm.
This is a thin wrapper around the C++ binding that computes an optimal assignment for a 2D
cost matrix. It returns row/column index arrays (JVX-like) matching SciPy's
linear_sum_assignment ordering.
Parameters
----------
cost : np.ndarray, shape (M, N)
2D cost matrix. Supported dtypes: int32, int64, float32, float64.
- Rectangular inputs are handled internally (the dense solver pads as needed).
- NaN entries (for float types) are treated as forbidden assignments.
return_cost : bool, default True
If True, return (total_cost, row_indices, col_indices).
If False, return only (row_indices, col_indices).
Returns
-------
If return_cost is True:
total_cost : float
Sum of cost at the selected (row, col) pairs.
row_indices : np.ndarray with shape (K,), dtype int64 (platform-dependent via NumPy)
Row indices of the assignment.
col_indices : np.ndarray with shape (K,), dtype int64 (platform-dependent via NumPy)
Column indices of the assignment.
Else:
row_indices, col_indices
Notes
-----
- This is the classic dense JV routine; for very large, sparse, or otherwise
structured problems, consider using lapjv/lapjvx variants optimized for those cases.
- Forbidden assignments can be encoded with np.nan (float inputs).
"""
return _lapjvc(cost, return_cost=return_cost)
# Copyright (c) 2025 Ratha SIV | MIT License
import os
import numpy as np
from typing import List, Tuple, Union
from concurrent.futures import ThreadPoolExecutor, as_completed
from ._lapjvs_wp import lapjvs as _lapjvs_single
from ._lapjvs_wp import lapjvsa as _lapjvsa_single
def _normalize_threads(n_threads: int) -> int:
if n_threads is None or n_threads == 0:
cpu = os.cpu_count() or 1
return max(1, int(cpu))
return max(1, int(n_threads))
def _solve_one_jvs(
a2d: np.ndarray,
extend_cost: bool,
prefer_float32: bool,
jvx_like: bool,
return_cost: bool,
):
# Calls the proven single-instance wrapper; it releases the GIL internally.
return _lapjvs_single(
a2d, extend_cost=extend_cost, return_cost=return_cost,
jvx_like=jvx_like, prefer_float32=prefer_float32
)
def _solve_one_jvsa(
a2d: np.ndarray,
extend_cost: bool,
prefer_float32: bool,
return_cost: bool,
):
return _lapjvsa_single(
a2d, extend_cost=extend_cost, return_cost=return_cost,
prefer_float32=prefer_float32
)
def lapjvs_batch(
costs: np.ndarray,
extend_cost: bool = False,
return_cost: bool = True,
n_threads: int = 0,
prefer_float32: bool = True,
) -> Union[
Tuple[np.ndarray, List[np.ndarray], List[np.ndarray]],
Tuple[List[np.ndarray], List[np.ndarray]]
]:
"""
Batched lapjvs solver with a thread pool.
For each 2D cost matrix in a 3D batch, this function runs `lapjvs` and
aggregates the per-instance results. It preserves the order of the batch
in the outputs.
Parameters
----------
costs : np.ndarray, shape (B, N, M)
Batch of cost matrices (float32/float64). Each slice `costs[b]` is
a single LAP instance.
extend_cost : bool, default False
If True, rectangular instances are solved via internal zero-padding.
If False, each instance must be square or a ValueError is raised.
This is forwarded to the single-instance solver.
return_cost : bool, default True
If True, return per-instance totals as the first output.
n_threads : int, default 0
Number of worker threads. When 0 or None, uses `os.cpu_count()`.
Actual workers are capped to the batch size.
prefer_float32 : bool, default True
Hint to run each kernel in float32 (forwarded to the single solver).
Returns
-------
If return_cost is True:
totals : np.ndarray, shape (B,), float64
Total assignment cost for each instance, computed from the ORIGINAL
per-instance cost matrix.
rows_list : List[np.ndarray[int64]]
For each b, a 1D array of assigned row indices (length K_b).
cols_list : List[np.ndarray[int64]]
For each b, a 1D array of assigned col indices (length K_b).
Else:
rows_list, cols_list
Raises
------
ValueError
- If `costs` is not a 3D array.
- If any instance is rectangular while `extend_cost=False`.
Notes
-----
- Threading:
The underlying native kernel releases the GIL, so using multiple threads
can accelerate large batches on multi-core systems.
- Dtypes:
Each instance may be float32 or float64; the kernel selection and casting
follow the single-instance rules. Totals are float64.
"""
A = np.asarray(costs)
if A.ndim != 3:
raise ValueError("3-dimensional array expected [B, N, M]")
B = A.shape[0]
threads = _normalize_threads(n_threads)
totals = np.empty((B,), dtype=np.float64) if return_cost else None
rows_list: List[np.ndarray] = [None] * B # type: ignore
cols_list: List[np.ndarray] = [None] * B # type: ignore
def work(bi: int):
a2d = A[bi]
if return_cost:
total, rows, cols = _solve_one_jvs( # type: ignore
a2d, extend_cost=extend_cost, prefer_float32=prefer_float32,
jvx_like=True, return_cost=True
)
return bi, total, rows, cols
else:
rows, cols = _solve_one_jvs( # type: ignore
a2d, extend_cost=extend_cost, prefer_float32=prefer_float32,
jvx_like=True, return_cost=False
)
return bi, None, rows, cols
if threads == 1 or B == 1:
for bi in range(B):
idx, t, r, c = work(bi)
if return_cost:
totals[idx] = float(t) # type: ignore
rows_list[idx] = np.asarray(r, dtype=np.int64)
cols_list[idx] = np.asarray(c, dtype=np.int64)
else:
# Cap workers to the batch size
with ThreadPoolExecutor(max_workers=min(threads, B)) as ex:
futures = [ex.submit(work, bi) for bi in range(B)]
for fut in as_completed(futures):
idx, t, r, c = fut.result()
if return_cost:
totals[idx] = float(t) # type: ignore
rows_list[idx] = np.asarray(r, dtype=np.int64)
cols_list[idx] = np.asarray(c, dtype=np.int64)
if return_cost:
return np.asarray(totals, dtype=np.float64), rows_list, cols_list # type: ignore
else:
return rows_list, cols_list
def lapjvsa_batch(
costs: np.ndarray,
extend_cost: bool = False,
return_cost: bool = True,
n_threads: int = 0,
prefer_float32: bool = True,
) -> Union[Tuple[np.ndarray, List[np.ndarray]], List[np.ndarray]]:
"""
Batched lapjvsa solver, returning (K_b, 2) arrays per instance.
Runs `lapjvsa` on each (N, M) slice of a (B, N, M) batch and aggregates
the results while preserving order.
Parameters
----------
costs : np.ndarray, shape (B, N, M)
Batch of cost matrices.
extend_cost : bool, default False
If True, rectangular instances are solved via internal zero-padding.
return_cost : bool, default True
If True, include per-instance totals as the first returned array.
n_threads : int, default 0
Number of worker threads. 0 or None uses `os.cpu_count()`.
prefer_float32 : bool, default True
Forwarded to the single-instance solver.
Returns
-------
If return_cost is True:
totals : np.ndarray, shape (B,), float64
pairs_list : List[np.ndarray[int64] with shape (K_b, 2)]
Else:
pairs_list
Raises
------
ValueError
If `costs` is not a 3D array, or if any instance is rectangular while
`extend_cost=False`.
Notes
-----
- See `lapjvsa` for details on dtype handling and total-cost accumulation.
- Results are reassembled in batch order irrespective of threading.
"""
A = np.asarray(costs)
if A.ndim != 3:
raise ValueError("3-dimensional array expected [B, N, M]")
B = A.shape[0]
threads = _normalize_threads(n_threads)
totals = np.empty((B,), dtype=np.float64) if return_cost else None
pairs_list: List[np.ndarray] = [None] * B # type: ignore
def work(bi: int):
a2d = A[bi]
if return_cost:
total, pairs = _solve_one_jvsa(
a2d, extend_cost=extend_cost, prefer_float32=prefer_float32, return_cost=True
)
return bi, total, pairs
else:
pairs = _solve_one_jvsa(
a2d, extend_cost=extend_cost, prefer_float32=prefer_float32, return_cost=False
)
return bi, None, pairs
if threads == 1 or B == 1:
for bi in range(B):
idx, t, P = work(bi)
if return_cost:
totals[idx] = float(t) # type: ignore
pairs_list[idx] = np.asarray(P, dtype=np.int64)
else:
# Cap workers to the batch size
with ThreadPoolExecutor(max_workers=min(threads, B)) as ex:
futures = [ex.submit(work, bi) for bi in range(B)]
for fut in as_completed(futures):
idx, t, P = fut.result()
if return_cost:
totals[idx] = float(t) # type: ignore
pairs_list[idx] = np.asarray(P, dtype=np.int64)
if return_cost:
return np.asarray(totals, dtype=np.float64), pairs_list # type: ignore
else:
return pairs_list
# Copyright (c) 2025 Ratha SIV | MIT License
import numpy as np
from typing import Optional, Tuple, Union
from ._lapjvs import lapjvs_native as _lapjvs_native # type: ignore
from ._lapjvs import lapjvs_float32 as _lapjvs_float32 # type: ignore
from ._lapjvs import lapjvsa_native as _lapjvsa_native # type: ignore
from ._lapjvs import lapjvsa_float32 as _lapjvsa_float32 # type: ignore
def lapjvs(
cost: np.ndarray,
extend_cost: Optional[bool] = None,
return_cost: bool = True,
jvx_like: bool = True,
prefer_float32: bool = True,
) -> Union[
Tuple[float, np.ndarray, np.ndarray],
Tuple[np.ndarray, np.ndarray]
]:
"""
This function wraps a high-performance JV solver and provides flexible
I/O to match either lapjv-style vector outputs (x, y) or lapjvx/SciPy-style
pair lists (rows, cols). It handles rectangular inputs by zero-padding to a
square matrix internally when requested.
Parameters
----------
cost : np.ndarray, shape (n, m)
The cost matrix. Must be 2D and a real floating dtype. Values are treated
as minimization costs. Rectangular matrices are supported via internal
zero-padding when `extend_cost=True` or `extend_cost=None and n != m`.
extend_cost : Optional[bool], default None
Controls how rectangular inputs are handled:
- True: Always zero-pad to a square internally (if needed).
- False: Require a square matrix, otherwise raise ValueError.
- None: Auto mode; pad iff the input is rectangular.
return_cost : bool, default True
If True, include the total assignment cost as the first return value.
The total is always recomputed from the ORIGINAL input array `cost`
(float64 accumulation) to match previous numeric behavior.
jvx_like : bool, default True
Selects the output format.
- True: Return lapjvx/SciPy-style indexing arrays:
return_cost=True -> (total_cost: float, rows: (k,), cols: (k,))
return_cost=False -> (rows: (k,), cols: (k,))
Here, `rows[i]` is assigned to `cols[i]`.
- False: Return lapjv-style mapping vectors:
return_cost=True -> (total_cost: float, x: (n0,), y: (m0,))
return_cost=False -> (x: (n0,), y: (m0,))
`x[i]` gives the assigned column for row i or -1 if unassigned.
`y[j]` gives the assigned row for column j or -1 if unassigned.
prefer_float32 : bool, default True
When True, the solver kernel runs in float32 to reduce memory bandwidth
and improve speed. When False and the input is float64, the kernel runs
in float64. Regardless of kernel dtype, the returned total cost is
recomputed against the ORIGINAL `cost` array.
Returns
-------
See `jvx_like` and `return_cost` above for exact signatures. In all cases,
index arrays are int64 and refer to indices in the ORIGINAL orientation of
`cost` (not the internally transposed one).
Raises
------
ValueError
- If `cost` is not a 2D array.
- If `extend_cost=False` and the input matrix is rectangular.
Notes
-----
- Rectangular handling:
Internally, the solver normalizes orientation so that the working matrix
has rows <= cols. Rectangular problems are modeled by zero-padding on
the right and/or bottom to become square. Only assignments within the
original (n, m) region are returned and used for the total.
- Dtype:
The kernel may operate in float32 or float64, but accumulation for the
returned total cost is performed in float64 on the ORIGINAL `cost`.
"""
# Keep the original array to compute the final cost from it (preserves previous behavior)
A = np.asarray(cost)
if A.ndim != 2:
raise ValueError("cost must be a 2D array")
n0, m0 = A.shape
transposed = False
# Normalize orientation for performance: let the kernel see rows <= cols.
if n0 > m0:
B = np.ascontiguousarray(A.T)
transposed = True
else:
B = np.ascontiguousarray(A)
n, m = B.shape
extend = (n != m) if (extend_cost is None) else bool(extend_cost)
# Choose backend and working dtype for the solver only
use_float32_kernel = not ((prefer_float32 is False) and (B.dtype == np.float64))
if use_float32_kernel:
_kernel = _lapjvs_float32
work_base = np.ascontiguousarray(B, dtype=np.float32)
else:
_kernel = _lapjvs_native
work_base = np.ascontiguousarray(B, dtype=np.float64)
def _rows_cols_from_x(x_vec: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
if x_vec.size == 0:
return np.empty((0,), dtype=np.int64), np.empty((0,), dtype=np.int64)
mask = x_vec >= 0
rows_b = np.nonzero(mask)[0].astype(np.int64, copy=False)
cols_b = x_vec[mask].astype(np.int64, copy=False)
if not transposed:
return rows_b, cols_b
# Map back to original orientation (A): swap row/col
return cols_b, rows_b
if not extend:
# Square: call solver directly on chosen dtype, compute total from ORIGINAL A
if n != m:
# Guard (per docstring): if extend_cost=False, require square input
raise ValueError("extend_cost=False requires a square cost matrix")
x_raw_obj, y_raw_obj = _kernel(work_base)
x_raw_b = np.asarray(x_raw_obj, dtype=np.int64)
if jvx_like:
rows_a, cols_a = _rows_cols_from_x(x_raw_b)
if return_cost:
total = float(A[rows_a, cols_a].sum()) if rows_a.size else 0.0
return total, rows_a, cols_a
else:
return rows_a, cols_a
else:
# Return vectors (x, y) in original orientation
y_raw_b = np.asarray(y_raw_obj, dtype=np.int64)
if not transposed:
if return_cost:
total = float(A[np.arange(n), x_raw_b].sum()) if n > 0 else 0.0
return total, x_raw_b, y_raw_b
else:
return x_raw_b, y_raw_b
# transposed square should not happen (n0==m0 implies no transpose), but keep safe mapping
# Build pairs from B then map to A vectors
rows_a, cols_a = _rows_cols_from_x(x_raw_b)
x_out = np.full(n0, -1, dtype=np.int64)
if rows_a.size:
x_out[rows_a] = cols_a
y_out = np.full(m0, -1, dtype=np.int64)
if rows_a.size:
y_out[cols_a] = rows_a
if return_cost:
total = float(A[rows_a, cols_a].sum()) if rows_a.size else 0.0
return total, x_out, y_out
else:
return x_out, y_out
# Rectangular: zero-pad to square (in B space), solve, map back; compute total from ORIGINAL A
size = max(n, m)
padded = np.empty((size, size), dtype=work_base.dtype)
# copy original submatrix
padded[:n, :m] = work_base
if m < size:
padded[:n, m:] = 0
if n < size:
padded[n:, :] = 0
x_pad_obj, y_pad_obj = _kernel(padded)
x_pad_b = np.asarray(x_pad_obj, dtype=np.int64)
# Trim to original rectangle (B space), then map to A space if needed
cols_pad_n = x_pad_b[:n]
mask_r_b = (cols_pad_n >= 0) & (cols_pad_n < m)
# Prepare pairs in A-space for convenience
if mask_r_b.any():
rows_b = np.nonzero(mask_r_b)[0].astype(np.int64, copy=False)
cols_b = cols_pad_n[mask_r_b].astype(np.int64, copy=False)
if transposed:
rows_a = cols_b
cols_a = rows_b
else:
rows_a = rows_b
cols_a = cols_b
else:
rows_a = np.empty((0,), dtype=np.int64)
cols_a = np.empty((0,), dtype=np.int64)
if jvx_like:
total = float(A[rows_a, cols_a].sum()) if (return_cost and rows_a.size) else 0.0
return (total, rows_a, cols_a) if return_cost else (rows_a, cols_a)
# lapjv-like outputs (vectorized) in ORIGINAL orientation
x_out = np.full(n0, -1, dtype=np.int64)
y_out = np.full(m0, -1, dtype=np.int64)
if rows_a.size:
x_out[rows_a] = cols_a
y_out[cols_a] = rows_a
if return_cost and rows_a.size:
total = float(A[rows_a, cols_a].sum())
else:
total = 0.0
return (total, x_out, y_out) if return_cost else (x_out, y_out)
def lapjvsa(
cost: np.ndarray,
extend_cost: Optional[bool] = None,
return_cost: bool = True,
prefer_float32: bool = True,
) -> Union[
Tuple[float, np.ndarray],
np.ndarray
]:
"""
This variant returns a compact pairs array of shape (K, 2), where each row
is a (row_index, col_index) assignment in the ORIGINAL orientation of the
input matrix. Rectangular inputs are handled by internal zero-padding if
requested.
Parameters
----------
cost : np.ndarray, shape (n, m)
Cost matrix (float32/float64). Must be 2D.
extend_cost : Optional[bool], default None
Rectangular handling:
- True: Zero-pad to square internally (if needed).
- False: Require square, else raise ValueError.
- None: Auto; pad iff rectangular.
return_cost : bool, default True
If True, include the total cost as the first return value. The total is
computed from the ORIGINAL input matrix.
prefer_float32 : bool, default True
Hint to run the solver kernel in float32 for performance. When False and
the input is float64, the kernel uses float64.
Returns
-------
If return_cost is True:
(total_cost: float, pairs: np.ndarray[int64] with shape (K, 2))
Else:
pairs: np.ndarray[int64] with shape (K, 2)
Raises
------
ValueError
If `cost` is not 2D, or if `extend_cost=False` and `cost` is rectangular.
Notes
-----
- Orientation is normalized internally so the kernel sees rows <= cols.
Returned pairs are always mapped back to the ORIGINAL orientation.
- Pairs only include assignments within the original (n, m) region for
rectangular inputs.
- Total is accumulated in float64 from the ORIGINAL `cost`.
"""
A = np.asarray(cost)
if A.ndim != 2:
raise ValueError("cost must be a 2D array")
n0, m0 = A.shape
transposed = False
# Normalize orientation for performance
if n0 > m0:
B = np.ascontiguousarray(A.T)
transposed = True
else:
B = np.ascontiguousarray(A)
n, m = B.shape
extend = (n != m) if (extend_cost is None) else bool(extend_cost)
# Select dtype/backend
use_f32 = not ((prefer_float32 is False) and (B.dtype == np.float64))
wdtype = np.float32 if use_f32 else (B.dtype if B.dtype in (np.float32, np.float64) else np.float64)
if not extend:
if n != m:
raise ValueError("extend_cost=False requires a square cost matrix")
work = np.ascontiguousarray(B, dtype=wdtype)
pairs_b_obj = (_lapjvsa_float32(work) if use_f32 else _lapjvsa_native(work))
pairs_b = np.asarray(pairs_b_obj, dtype=np.int64)
# Map pairs back to original orientation
if transposed and pairs_b.size:
pairs_a = pairs_b[:, ::-1].astype(np.int64, copy=False)
else:
pairs_a = pairs_b
if return_cost:
if pairs_a.size:
r = pairs_a[:, 0]; c = pairs_a[:, 1]
total = float(A[r, c].sum())
else:
total = 0.0
return total, pairs_a
return pairs_a
# Rectangular: zero-pad in B space, solve, trim, map back to A
size = max(n, m)
padded = np.empty((size, size), dtype=wdtype)
padded[:n, :m] = B.astype(wdtype, copy=False)
if m < size:
padded[:n, m:] = 0
if n < size:
padded[n:, :] = 0
pairs_pad_b_obj = (_lapjvsa_float32(padded) if use_f32 else _lapjvsa_native(padded))
pairs_pad_b = np.asarray(pairs_pad_b_obj, dtype=np.int64)
if pairs_pad_b.size == 0 or n == 0 or m == 0:
pairs_a = np.empty((0, 2), dtype=np.int64)
total = 0.0
else:
r_b = pairs_pad_b[:, 0]
c_b = pairs_pad_b[:, 1]
mask_b = (r_b >= 0) & (r_b < n) & (c_b >= 0) & (c_b < m)
if mask_b.any():
pairs_b = np.stack([r_b[mask_b], c_b[mask_b]], axis=1).astype(np.int64, copy=False)
# Map back to A orientation if needed
pairs_a = pairs_b[:, ::-1] if transposed else pairs_b
if return_cost and pairs_a.size:
total = float(A[pairs_a[:, 0], pairs_a[:, 1]].sum())
else:
total = 0.0
else:
pairs_a = np.empty((0, 2), dtype=np.int64)
total = 0.0
return (total, pairs_a) if return_cost else pairs_a
# Copyright (c) 2025 Ratha SIV | MIT License
import os
import numpy as np
from typing import List, Tuple, Union
from concurrent.futures import ThreadPoolExecutor, as_completed
from ._lapjvx import lapjvx as _lapjvx_single # type: ignore
from ._lapjvx import lapjvxa as _lapjvxa_single # type: ignore
def _normalize_threads(n_threads: int) -> int:
if not n_threads:
return max(1, int(os.cpu_count() or 1))
return max(1, int(n_threads))
def lapjvx_batch(
costs: np.ndarray,
extend_cost: bool = False,
cost_limit: float = np.inf,
return_cost: bool = True,
n_threads: int = 0,
) -> Union[
Tuple[np.ndarray, List[np.ndarray], List[np.ndarray]],
Tuple[List[np.ndarray], List[np.ndarray]]
]:
"""
Batched lapjvx solver with a thread pool.
This function applies a JVX-style solver (`lapjvx`) across a batch of cost
matrices and aggregates the results. It preserves batch order and supports
multi-threaded execution.
Parameters
----------
costs : np.ndarray, shape (B, N, M)
Batch of cost matrices (float32/float64).
extend_cost : bool, default False
If True, rectangular matrices are handled via internal zero-padding.
If False, instances must be square.
cost_limit : float, default np.inf
A per-instance threshold to prune/limit assignments, forwarded to the
underlying `lapjvx` implementation.
return_cost : bool, default True
If True, returns per-instance totals first.
n_threads : int, default 0
Number of worker threads. 0 or None uses `os.cpu_count()`.
Returns
-------
If return_cost is True:
totals : np.ndarray, shape (B,), float64
rows_list : List[np.ndarray[int64]] of length B
cols_list : List[np.ndarray[int64]] of length B
Else:
rows_list, cols_list
Raises
------
ValueError
- If `costs` is not a 3D array.
- If any instance is rectangular while `extend_cost=False`.
Notes
-----
- See the single-instance `lapjvx` for detailed behavior around `extend_cost`
and `cost_limit`.
"""
A = np.asarray(costs)
if A.ndim != 3:
raise ValueError("3-dimensional array expected [B, N, M]")
B = A.shape[0]
threads = _normalize_threads(n_threads)
totals = np.empty((B,), dtype=np.float64) if return_cost else None
rows_list: List[np.ndarray] = [None] * B # type: ignore
cols_list: List[np.ndarray] = [None] * B # type: ignore
def work(bi: int):
a2d = A[bi]
if return_cost:
total, rows, cols = _lapjvx_single(
a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=True
)
return bi, total, rows, cols
else:
rows, cols = _lapjvx_single(
a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=False
)
return bi, None, rows, cols
if threads == 1 or B == 1:
for bi in range(B):
i, t, r, c = work(bi)
if return_cost:
totals[i] = float(t) # type: ignore
rows_list[i] = np.asarray(r, dtype=np.int64)
cols_list[i] = np.asarray(c, dtype=np.int64)
else:
# Cap workers to the batch size
with ThreadPoolExecutor(max_workers=min(threads, B)) as ex:
futures = [ex.submit(work, bi) for bi in range(B)]
for fut in as_completed(futures):
i, t, r, c = fut.result()
if return_cost:
totals[i] = float(t) # type: ignore
rows_list[i] = np.asarray(r, dtype=np.int64)
cols_list[i] = np.asarray(c, dtype=np.int64)
if return_cost:
return np.asarray(totals, dtype=np.float64), rows_list, cols_list # type: ignore
return rows_list, cols_list
def lapjvxa_batch(
costs: np.ndarray,
extend_cost: bool = False,
cost_limit: float = np.inf,
return_cost: bool = True,
n_threads: int = 0,
) -> Union[Tuple[np.ndarray, List[np.ndarray]], List[np.ndarray]]:
"""
Batched lapjvxa solver, returning (K_b, 2) arrays per instance.
Parameters
----------
costs : np.ndarray, shape (B, N, M)
Batch of cost matrices.
extend_cost : bool, default False
If True, rectangular matrices are solved by internal zero-padding.
cost_limit : float, default np.inf
Forwarded to `lapjvxa` to limit/prune assignments.
return_cost : bool, default True
If True, includes per-instance totals as the first returned array.
n_threads : int, default 0
Number of worker threads. 0 or None uses `os.cpu_count()`.
Returns
-------
If return_cost is True:
totals : np.ndarray, shape (B,), float64
pairs_list : List[np.ndarray[int64] with shape (K_b, 2)]
Else:
pairs_list
Raises
------
ValueError
If `costs` is not a 3D array, or if any instance is rectangular while
`extend_cost=False`.
Notes
-----
- See `lapjvxa` for single-instance behavior and semantics of `cost_limit`.
- Results are returned in the original batch order.
"""
A = np.asarray(costs)
if A.ndim != 3:
raise ValueError("3-dimensional array expected [B, N, M]")
B = A.shape[0]
threads = _normalize_threads(n_threads)
totals = np.empty((B,), dtype=np.float64) if return_cost else None
pairs_list: List[np.ndarray] = [None] * B # type: ignore
def work(bi: int):
a2d = A[bi]
if return_cost:
total, pairs = _lapjvxa_single(
a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=True
)
return bi, total, pairs
else:
pairs = _lapjvxa_single(
a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=False
)
return bi, None, pairs
if threads == 1 or B == 1:
for bi in range(B):
i, t, P = work(bi)
if return_cost:
totals[i] = float(t) # type: ignore
pairs_list[i] = np.asarray(P, dtype=np.int64)
else:
# Cap workers to the batch size
with ThreadPoolExecutor(max_workers=min(threads, B)) as ex:
futures = [ex.submit(work, bi) for bi in range(B)]
for fut in as_completed(futures):
i, t, P = fut.result()
if return_cost:
totals[i] = float(t) # type: ignore
pairs_list[i] = np.asarray(P, dtype=np.int64)
if return_cost:
return np.asarray(totals, dtype=np.float64), pairs_list # type: ignore
return pairs_list
# Copyright (c) 2025 Ratha SIV | MIT License
import numpy as np
from typing import Tuple, Union
from ._lapjvx import lapjvx as _lapjvx # type: ignore
from ._lapjvx import lapjvxa as _lapjvxa # type: ignore
def lapjvx(
cost: np.ndarray,
extend_cost: bool = False,
cost_limit: float = np.inf,
return_cost: bool = True,
) -> Union[
Tuple[float, np.ndarray, np.ndarray],
Tuple[np.ndarray, np.ndarray],
]:
"""
Solve the Linear Assignment Problem using the Jonker-Volgenant algorithm,
returning (row_indices, col_indices) like scipy.optimize.linear_sum_assignment.
Parameters
----------
cost : np.ndarray, shape (N, M)
2D cost matrix. Any float dtype is accepted; internally a single contiguous
float64 working buffer is used when required.
extend_cost : bool, default False
Permit rectangular inputs by zero-padding to a square matrix.
cost_limit : float, default np.inf
If finite, augment to size (N+M) with sentinel edges of cost_limit/2 and a
bottom-right zero block, modeling a per-edge reject cost (allows rectangular inputs).
return_cost : bool, default True
If True, include total assignment cost first (computed on the ORIGINAL input).
Returns
-------
If return_cost is True:
total_cost : float
row_indices : np.ndarray with shape (K,), dtype int64
Row indices of selected assignments (in original orientation).
col_indices : np.ndarray with shape (K,), typically dtype int32
Column indices corresponding to row_indices.
Else:
row_indices, col_indices
Notes
-----
- Orientation is normalized internally so the native kernel sees rows <= cols;
indices are mapped back to the ORIGINAL orientation on return.
- Dtypes of the returned indices follow the Cython implementation:
row_indices as int64, col_indices often int32 (subject to NumPy/platform).
- Unified augmentation policy:
* cost_limit < inf: augment to (N+M) (rectangular allowed).
* elif (N != M) or extend_cost: zero-pad to square max(N, M).
* else: run on the given square matrix.
"""
return _lapjvx(cost, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=return_cost)
def lapjvxa(
cost: np.ndarray,
extend_cost: bool = False,
cost_limit: float = np.inf,
return_cost: bool = True,
) -> Union[
Tuple[float, np.ndarray],
np.ndarray,
]:
"""
Like lapjvx, but returns assignment pairs as a compact (K, 2) ndarray of (row, col).
Parameters
----------
cost : np.ndarray, shape (N, M)
2D cost matrix.
extend_cost : bool, default False
Permit rectangular inputs by zero-padding to a square matrix.
cost_limit : float, default np.inf
When finite, augment to (N+M) to model per-edge reject cost (see lapjvx).
return_cost : bool, default True
If True, include the total cost as the first element.
Returns
-------
If return_cost is True:
total_cost : float
assignments : np.ndarray with shape (K, 2), dtype int32
Each row is (row_index, col_index) in the ORIGINAL orientation.
Else:
assignments : np.ndarray with shape (K, 2), dtype int32
Notes
-----
- This is a convenience wrapper over lapjvx that packs (rows, cols) into a (K, 2) array.
- Total cost is computed on the ORIGINAL input (not augmented or padded).
"""
return _lapjvxa(cost, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=return_cost)
# Copyright (c) 2012-2025 Tomas Kazmar | BSD-2-Clause License
import numpy as np
import numpy.typing as npt
from bisect import bisect_left
from typing import Tuple, Union
# import logging
from ._lapjv import _lapmod, FP_DYNAMIC_ as FP_DYNAMIC, LARGE_ as LARGE # type: ignore
def _pycrrt(n, cc, ii, kk, free_rows, x, y, v):
# log = logging.getLogger('do_column_reduction_and_reduction_transfer')
x[:] = -1
y[:] = -1
v[:] = LARGE
for i in range(n):
ks = slice(ii[i], ii[i+1])
js = kk[ks]
ccs = cc[ks]
mask = ccs < v[js]
js = js[mask]
v[js] = ccs[mask]
y[js] = i
# log.debug('v = %s', v)
# for j in range(cost.shape[1]):
unique = np.empty((n,), dtype=bool)
unique[:] = True
for j in range(n-1, -1, -1):
i = y[j]
# If row is not taken yet, initialize it with the minimum stored in y.
if x[i] < 0:
x[i] = j
else:
unique[i] = False
y[j] = -1
# log.debug('bw %s %s %s %s', i, j, x, y)
# log.debug('unique %s', unique)
n_free_rows = 0
for i in range(n):
# Store unassigned row i.
if x[i] < 0:
free_rows[n_free_rows] = i
n_free_rows += 1
elif unique[i] and ii[i+1] - ii[i] > 1:
# >1 check prevents choking on rows with a single entry
# Transfer from an assigned row.
j = x[i]
# Find the current 2nd minimum of the reduced column costs:
# (cost[i,j] - v[j]) for some j.
ks = slice(ii[i], ii[i+1])
js = kk[ks]
minv = np.min(cc[ks][js != j] - v[js][js != j])
# log.debug("v[%d] = %f - %f", j, v[j], minv)
v[j] -= minv
# log.debug('free: %s', free_rows[:n_free_rows])
# log.debug('%s %s', x, v)
return n_free_rows
def find_minima(indices, values):
if len(indices) > 0:
j1 = indices[0]
v1 = values[0]
else:
j1 = 0
v1 = LARGE
j2 = -1
v2 = LARGE
# log = logging.getLogger('find_minima')
# log.debug(sorted(zip(values, indices))[:2])
for j, h in zip(indices[1:], values[1:]):
# log.debug('%d = %f %d = %f', j1, v1, j2, v2)
if h < v2:
if h >= v1:
v2 = h
j2 = j
else:
v2 = v1
v1 = h
j2 = j1
j1 = j
# log.debug('%d = %f %d = %f', j1, v1, j2, v2)
return j1, v1, j2, v2
def _pyarr(n, cc, ii, kk, n_free_rows, free_rows, x, y, v):
# log = logging.getLogger('do_augmenting_row_reduction')
# log.debug('%s %s %s', x, y, v)
current = 0
# log.debug('free: %s', free_rows[:n_free_rows])
new_free_rows = 0
while current < n_free_rows:
free_i = free_rows[current]
# log.debug('current = %d', current)
current += 1
ks = slice(ii[free_i], ii[free_i+1])
js = kk[ks]
j1, v1, j2, v2 = find_minima(js, cc[ks] - v[js])
i0 = y[j1]
v1_new = v[j1] - (v2 - v1)
v1_lowers = v1_new < v[j1]
# log.debug(
# '%d %d 1=%s,%f 2=%s,%f %f %s',
# free_i, i0, j1, v1, j2, v2, v1_new, v1_lowers)
if v1_lowers:
v[j1] = v1_new
elif i0 >= 0 and j2 != -1: # i0 is assigned, try j2
j1 = j2
i0 = y[j2]
x[free_i] = j1
y[j1] = free_i
if i0 >= 0:
if v1_lowers:
current -= 1
# log.debug('continue augmenting path from current %s %s %s')
free_rows[current] = i0
else:
# log.debug('stop the augmenting path and keep for later')
free_rows[new_free_rows] = i0
new_free_rows += 1
# log.debug('free: %s', free_rows[:new_free_rows])
return new_free_rows
def binary_search(data, key):
# log = logging.getLogger('binary_search')
i = bisect_left(data, key)
# log.debug('Found data[%d]=%d for %d', i, data[i], key)
if i < len(data) and data[i] == key:
return i
else:
return None
def _find(hi, d, cols, y):
lo, hi = hi, hi + 1
minv = d[cols[lo]]
# XXX: anytime this happens to be NaN, i'm screwed...
# assert not np.isnan(minv)
for k in range(hi, len(cols)):
j = cols[k]
if d[j] <= minv:
# New minimum found, trash the new SCAN columns found so far.
if d[j] < minv:
hi = lo
minv = d[j]
cols[k], cols[hi] = cols[hi], j
hi += 1
return minv, hi, cols
def _scan(n, cc, ii, kk, minv, lo, hi, d, cols, pred, y, v):
# log = logging.getLogger('_scan')
# Scan all TODO columns.
while lo != hi:
j = cols[lo]
lo += 1
i = y[j]
# log.debug('?%d kk[%d:%d]=%s', j, ii[i], ii[i+1], kk[ii[i]:ii[i+1]])
kj = binary_search(kk[ii[i]:ii[i+1]], j)
if kj is None:
continue
kj = ii[i] + kj
h = cc[kj] - v[j] - minv
# log.debug('i=%d j=%d kj=%s h=%f', i, j, kj, h)
for k in range(hi, n):
j = cols[k]
kj = binary_search(kk[ii[i]:ii[i+1]], j)
if kj is None:
continue
kj = ii[i] + kj
cred_ij = cc[kj] - v[j] - h
if cred_ij < d[j]:
d[j] = cred_ij
pred[j] = i
if cred_ij == minv:
if y[j] < 0:
return j, None, None, d, cols, pred
cols[k] = cols[hi]
cols[hi] = j
hi += 1
return -1, lo, hi, d, cols, pred
def find_path(n, cc, ii, kk, start_i, y, v):
# log = logging.getLogger('find_path')
cols = np.arange(n, dtype=int)
pred = np.empty((n,), dtype=int)
pred[:] = start_i
d = np.empty((n,), dtype=float)
d[:] = LARGE
ks = slice(ii[start_i], ii[start_i+1])
js = kk[ks]
d[js] = cc[ks] - v[js]
# log.debug('d = %s', d)
minv = LARGE
lo, hi = 0, 0
n_ready = 0
final_j = -1
while final_j == -1:
# No SCAN columns, find new ones.
if lo == hi:
# log.debug('%d..%d -> find', lo, hi)
# log.debug('cols = %s', cols)
n_ready = lo
minv, hi, cols = _find(hi, d, cols, y)
# log.debug('%d..%d -> check', lo, hi)
# log.debug('cols = %s', cols)
# log.debug('y = %s', y)
for h in range(lo, hi):
# If any of the new SCAN columns is unassigned, use it.
if y[cols[h]] < 0:
final_j = cols[h]
if final_j == -1:
# log.debug('%d..%d -> scan', lo, hi)
final_j, lo, hi, d, cols, pred = _scan(
n, cc, ii, kk, minv, lo, hi, d, cols, pred, y, v)
# log.debug('d = %s', d)
# log.debug('cols = %s', cols)
# log.debug('pred = %s', pred)
# Update prices for READY columns.
for k in range(n_ready): # type: ignore
j0 = cols[k]
v[j0] += d[j0] - minv
assert final_j >= 0
assert final_j < n
return final_j, pred
def _pya(n, cc, ii, kk, n_free_rows, free_rows, x, y, v):
# log = logging.getLogger('augment')
for free_i in free_rows[:n_free_rows]:
# log.debug('looking at free_i=%s', free_i)
j, pred = find_path(n, cc, ii, kk, free_i, y, v)
# Augment the path starting from column j and backtracking to free_i.
i = -1
while i != free_i:
# log.debug('augment %s', j)
# log.debug('pred = %s', pred)
i = pred[j]
assert i >= 0
assert i < n
# log.debug('y[%d]=%d -> %d', j, y[j], i)
y[j] = i
j, x[i] = x[i], j
def check_cost(n, cc, ii, kk):
if n == 0:
raise ValueError('Cost matrix has zero rows.')
if len(kk) == 0:
raise ValueError('Cost matrix has zero columns.')
lo = cc.min()
hi = cc.max()
if lo < 0:
raise ValueError('Cost matrix values must be non-negative.')
if hi >= LARGE:
raise ValueError(
'Cost matrix values must be less than %s' % LARGE)
def get_cost(n, cc, ii, kk, x0):
ret = 0
for i, j in enumerate(x0):
kj = binary_search(kk[ii[i]:ii[i+1]], j)
if kj is None:
return np.inf
kj = ii[i] + kj
ret += cc[kj]
return ret
# def lapmod(n, cc, ii, kk, fast=True, return_cost=True, fp_version=FP_DYNAMIC):
def lapmod(
n: int,
cc: npt.NDArray[np.floating],
ii: npt.NDArray[np.integer],
kk: npt.NDArray[np.integer],
fast: bool = True,
return_cost: bool = True,
fp_version: int = FP_DYNAMIC,
) -> Union[
Tuple[float, np.ndarray, np.ndarray],
Tuple[np.ndarray, np.ndarray],
]:
"""Solve sparse linear assignment problem using Jonker-Volgenant algorithm.
n: number of rows of the assignment cost matrix
cc: 1D array of all finite elements of the assignement cost matrix
ii: 1D array of indices of the row starts in cc. The following must hold:
ii[0] = 0 and ii[n+1] = len(cc).
kk: 1D array of the column indices so that:
cost[i, kk[ii[i] + k]] == cc[ii[i] + k].
Indices within one row must be sorted.
extend_cost: whether or not extend a non-square matrix [default: False]
cost_limit: an upper limit for a cost of a single assignment
[default: np.inf]
return_cost: whether or not to return the assignment cost
Returns (opt, x, y) where:
opt: cost of the assignment
x: vector of columns assigned to rows
y: vector of rows assigned to columns
or (x, y) if return_cost is not True.
When extend_cost and/or cost_limit is set, all unmatched entries will be
marked by -1 in x/y.
"""
# log = logging.getLogger('lapmod')
check_cost(n, cc, ii, kk)
if fast is True:
# log.debug('[----CR & RT & ARR & augmentation ----]')
x, y = _lapmod(n, cc, ii, kk, fp_version=fp_version)
else:
cc = np.ascontiguousarray(cc, dtype=np.float64)
ii = np.ascontiguousarray(ii, dtype=np.int32)
kk = np.ascontiguousarray(kk, dtype=np.int32)
x = np.empty((n,), dtype=np.int32)
y = np.empty((n,), dtype=np.int32)
v = np.empty((n,), dtype=np.float64)
free_rows = np.empty((n,), dtype=np.int32)
# log.debug('[----Column reduction & reduction transfer----]')
n_free_rows = _pycrrt(n, cc, ii, kk, free_rows, x, y, v)
# log.debug(
# 'free, x, y, v: %s %s %s %s', free_rows[:n_free_rows], x, y, v)
if n_free_rows == 0:
# log.info('Reduction solved it.')
if return_cost is True:
return get_cost(n, cc, ii, kk, x), x, y
else:
return x, y
for it in range(2):
# log.debug('[---Augmenting row reduction (iteration: %d)---]', it)
n_free_rows = _pyarr(
n, cc, ii, kk, n_free_rows, free_rows, x, y, v)
# log.debug(
# 'free, x, y, v: %s %s %s %s', free_rows[:n_free_rows], x, y, v)
if n_free_rows == 0:
# log.info('Augmenting row reduction solved it.')
if return_cost is True:
return get_cost(n, cc, ii, kk, x), x, y
else:
return x, y
# log.info('[----Augmentation----]')
_pya(n, cc, ii, kk, n_free_rows, free_rows, x, y, v)
# log.debug('x, y, v: %s %s %s', x, y, v)
if return_cost is True:
return get_cost(n, cc, ii, kk, x), x, y
else:
return x, y
+477
-452
[![GitHub release](https://img.shields.io/github/release/rathaROG/lapx.svg)](https://github.com/rathaROG/lapx/releases)
[![PyPI version](https://badge.fury.io/py/lapx.svg)](https://badge.fury.io/py/lapx)
[![Benchmark](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml)
[![PyPI version](https://badge.fury.io/py/lapx.svg?v=0.8.1)](https://badge.fury.io/py/lapx)
[![Benchmark (Single)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml)
[![Benchmark (Batch)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml)

@@ -9,4 +9,4 @@ [![Benchmark (Object Tracking)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml)

`lapx` focuses more on real-world applications, and the [benchmark_batch.py](https://github.com/rathaROG/lapx/blob/main/.github/test/benchmark_batch.py)
and [benchmark.py](https://github.com/rathaROG/lapx/blob/main/.github/test/benchmark.py) are **not**
`lapx` focuses more on real-world applications, and the [benchmark_batch.py](https://github.com/rathaROG/lapx/blob/main/benchmarks/benchmark_batch.py)
and [benchmark_single.py](https://github.com/rathaROG/lapx/blob/main/benchmarks/benchmark_single.py) are **not**
intended for scientific research or competitive evaluation. Instead, it provides a quick and accessible way for

@@ -24,53 +24,68 @@ you to run benchmark tests on your own machine. Below, you will also find a collection of interesting results

git clone https://github.com/rathaROG/lapx.git
cd lapx/.github/test
cd lapx/benchmarks
python benchmark_batch.py
python benchmark.py
python benchmark_single.py
```
Note: [SciPy](https://pypi.org/project/scipy/) is used as the baseline in the benchmark.
Note: [SciPy](https://pypi.org/project/scipy/) is used as the baseline in the benchmark single `benchmark_single.py`.
📊 Some benchmark results using `lapx` [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) (2025/10/27):
📊 Some benchmark results using `lapx` [v0.9.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) (2025/10/27):
<details><summary>🗂️ Batch on my Windows 11 i9-13900KS (8 p-core + 8 e-core) + python 3.9.13:</summary>
<details><summary>🗂️ Batch on my local Windows 11 i9-13900KS (8 p-core + 8 e-core) + python 3.11.9:</summary>
```
# 50 x (3000x3000) | n_threads = 24
Microsoft Windows [Version 10.0.26200.7019]
(c) Microsoft Corporation. All rights reserved.
CPU lapx-batch-jvx : cost=82.08230873, time=1.40321970s
CPU lapx-batch-jvs : cost=82.08230873, time=0.80294538s
CPU lapx-batch-jvxa : cost=82.08230873, time=1.40610409s
CPU lapx-batch-jvsa : cost=82.08230873, time=0.81906796s
CPU lapx-batch-jvsa64 : cost=82.08230873, time=1.42109323s
CPU lapx-loop-jvx : cost=82.08230873, time=11.01966000s
CPU lapx-loop-jvs : cost=82.08230873, time=8.18710470s
D:\DEV\lapx_all\tmp\lapx\benchmarks>python benchmark_batch.py
# 100 x (2000x2000) | n_threads = 24
# 10 x (4000x4000) | n_threads = 24
CPU lapx-batch-jvx : cost=164.54469568, time=0.81932855s
CPU lapx-batch-jvs : cost=164.54469568, time=0.58506370s
CPU lapx-batch-jvxa : cost=164.54469568, time=0.83581567s
CPU lapx-batch-jvsa : cost=164.54469568, time=0.59467125s
CPU lapx-batch-jvsa64 : cost=164.54469568, time=0.88178015s
CPU lapx-loop-jvx : cost=164.54469568, time=7.68291450s
CPU lapx-loop-jvs : cost=164.54469568, time=6.44884777s
CPU lapx-batch-jvx : cost=16.48859572, time=0.67588449s
CPU lapx-batch-jvs : cost=16.48859572, time=0.46411657s
CPU lapx-batch-jvxa : cost=16.48859572, time=0.71385884s
CPU lapx-batch-jvsa : cost=16.48859572, time=0.45670390s
CPU lapx-batch-jvsa64 : cost=16.48859572, time=0.70847058s
CPU lapx-loop-jvx : cost=16.48859572, time=3.95986462s
CPU lapx-loop-jvs : cost=16.48859572, time=2.66866994s
# 500 x (1000x2000) | n_threads = 24
# 20 x (3000x2000) | n_threads = 24
CPU lapx-batch-jvx : cost=291.25078928, time=0.91706204s
CPU lapx-batch-jvs : cost=291.25078928, time=0.79455686s
CPU lapx-batch-jvxa : cost=291.25078928, time=0.93096972s
CPU lapx-batch-jvsa : cost=291.25078928, time=0.79109597s
CPU lapx-batch-jvsa64 : cost=291.25078928, time=1.23274732s
CPU lapx-loop-jvx : cost=291.25078928, time=5.47222424s
CPU lapx-loop-jvs : cost=291.25078928, time=5.73832059s
CPU lapx-batch-jvx : cost=16.65042067, time=0.18923616s
CPU lapx-batch-jvs : cost=16.65042067, time=0.17624354s
CPU lapx-batch-jvxa : cost=16.65042067, time=0.18447852s
CPU lapx-batch-jvsa : cost=16.65042067, time=0.18925667s
CPU lapx-batch-jvsa64 : cost=16.65042067, time=0.18949389s
CPU lapx-loop-jvx : cost=16.65042067, time=0.85662770s
CPU lapx-loop-jvs : cost=16.65042067, time=1.05569839s
# 1000 x (1000x1000) | n_threads = 24
# 50 x (2000x2000) | n_threads = 24
CPU lapx-batch-jvx : cost=1641.72891905, time=1.18257976s
CPU lapx-batch-jvs : cost=1641.72891905, time=1.13616300s
CPU lapx-batch-jvxa : cost=1641.72891905, time=1.16668177s
CPU lapx-batch-jvsa : cost=1641.72891905, time=1.11944461s
CPU lapx-batch-jvsa64 : cost=1641.72891905, time=1.23001194s
CPU lapx-loop-jvx : cost=1641.72891905, time=13.90460992s
CPU lapx-loop-jvs : cost=1641.72891905, time=14.32015085s
CPU lapx-batch-jvx : cost=82.12386385, time=0.56725645s
CPU lapx-batch-jvs : cost=82.12386385, time=0.37664533s
CPU lapx-batch-jvxa : cost=82.12386385, time=0.57265162s
CPU lapx-batch-jvsa : cost=82.12386385, time=0.37772393s
CPU lapx-batch-jvsa64 : cost=82.12386385, time=0.61493921s
CPU lapx-loop-jvx : cost=82.12386385, time=4.46092606s
CPU lapx-loop-jvs : cost=82.12386385, time=3.49988031s
# 100 x (1000x2000) | n_threads = 24
CPU lapx-batch-jvx : cost=58.19636934, time=0.18971944s
CPU lapx-batch-jvs : cost=58.19636934, time=0.16700149s
CPU lapx-batch-jvxa : cost=58.19636934, time=0.18943620s
CPU lapx-batch-jvsa : cost=58.19636934, time=0.16706610s
CPU lapx-batch-jvsa64 : cost=58.19636934, time=0.25204611s
CPU lapx-loop-jvx : cost=58.19636934, time=1.02838278s
CPU lapx-loop-jvs : cost=58.19636934, time=1.21967244s
# 500 x (1000x1000) | n_threads = 24
CPU lapx-batch-jvx : cost=821.97407482, time=0.59273267s
CPU lapx-batch-jvs : cost=821.97407482, time=0.58274126s
CPU lapx-batch-jvxa : cost=821.97407482, time=0.58346224s
CPU lapx-batch-jvsa : cost=821.97407482, time=0.58098578s
CPU lapx-batch-jvsa64 : cost=821.97407482, time=0.61520362s
CPU lapx-loop-jvx : cost=821.97407482, time=6.64442897s
CPU lapx-loop-jvs : cost=821.97407482, time=7.03527546s
```

@@ -82,3 +97,3 @@

https://github.com/rathaROG/lapx/actions/runs/18851354956/job/53788494991
https://github.com/rathaROG/lapx/actions/runs/18961613065/job/54149890164

@@ -89,17 +104,17 @@ ```

-----------------------------------------
* lapjvc : ✅ Passed 🐌 2.13 x slower
* lapjv : ✅ Passed 🐌 2.85 x slower
* lapjvx : ✅ Passed 🐌 1.17 x slower
* lapjvxa : ✅ Passed 🏆 1.6 x faster
* lapjvs : ✅ Passed 🐌 2.33 x slower
* lapjvsa : ✅ Passed 🐌 2.1 x slower
* lapjvc : ✅ Passed 🐌 1.64 x slower
* lapjv : ✅ Passed 🐌 5.53 x slower
* lapjvx : ✅ Passed 🐌 2.68 x slower
* lapjvxa : ✅ Passed 🐌 1.81 x slower
* lapjvs : ✅ Passed 🐌 3.56 x slower
* lapjvsa : ✅ Passed 🐌 3.36 x slower
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvxa : 0.00001882s
2. scipy ⭐ : 0.00003012s
3. lapjvx : 0.00003522s
4. lapjvsa : 0.00006335s
5. lapjvc : 0.00006415s
6. lapjvs : 0.00007016s
7. lapjv : 0.00008579s
1. scipy ⭐ : 0.00001024s
2. lapjvc : 0.00001677s
3. lapjvxa : 0.00001853s
4. lapjvx : 0.00002740s
5. lapjvsa : 0.00003439s
6. lapjvs : 0.00003648s
7. lapjv : 0.00005660s
-------------------------------

@@ -110,17 +125,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 1.5 x slower
* lapjv : ✅ Passed 🐌 1.9 x slower
* lapjvx : ✅ Passed 🐌 1.3 x slower
* lapjvxa : ✅ Passed 🏆 1.15 x faster
* lapjvs : ✅ Passed 🐌 1.92 x slower
* lapjvsa : ✅ Passed 🏆 1.79 x faster
* lapjvc : ✅ Passed 🐌 2.03 x slower
* lapjv : ✅ Passed 🐌 5.72 x slower
* lapjvx : ✅ Passed 🐌 2.28 x slower
* lapjvxa : ✅ Passed 🐌 1.75 x slower
* lapjvs : ✅ Passed 🐌 2.7 x slower
* lapjvsa : ✅ Passed 🐌 1.04 x slower
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvsa : 0.00000646s
2. lapjvxa : 0.00001002s
3. scipy ⭐ : 0.00001154s
4. lapjvx : 0.00001498s
5. lapjvc : 0.00001732s
6. lapjv : 0.00002196s
7. lapjvs : 0.00002215s
1. scipy ⭐ : 0.00000664s
2. lapjvsa : 0.00000690s
3. lapjvxa : 0.00001165s
4. lapjvc : 0.00001346s
5. lapjvx : 0.00001517s
6. lapjvs : 0.00001796s
7. lapjv : 0.00003800s
-------------------------------

@@ -131,17 +146,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 1.82 x slower
* lapjv : ✅ Passed 🐌 3.92 x slower
* lapjvx : ✅ Passed 🐌 2.34 x slower
* lapjvxa : ✅ Passed 🐌 1.69 x slower
* lapjvs : ✅ Passed 🐌 3.3 x slower
* lapjvsa : ✅ Passed 🐌 5.28 x slower
* lapjvc : ✅ Passed 🐌 2.3 x slower
* lapjv : ✅ Passed 🐌 9.53 x slower
* lapjvx : ✅ Passed 🐌 3.62 x slower
* lapjvxa : ✅ Passed 🐌 3.04 x slower
* lapjvs : ✅ Passed 🐌 4.63 x slower
* lapjvsa : ✅ Passed 🐌 5.32 x slower
----- 🎉 SPEED RANKING 🎉 -----
1. scipy ⭐ : 0.00000862s
2. lapjvxa : 0.00001455s
3. lapjvc : 0.00001566s
4. lapjvx : 0.00002017s
5. lapjvs : 0.00002845s
6. lapjv : 0.00003373s
7. lapjvsa : 0.00004552s
1. scipy ⭐ : 0.00000537s
2. lapjvc : 0.00001233s
3. lapjvxa : 0.00001631s
4. lapjvx : 0.00001947s
5. lapjvs : 0.00002489s
6. lapjvsa : 0.00002857s
7. lapjv : 0.00005116s
-------------------------------

@@ -152,17 +167,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 1.87 x slower
* lapjv : ✅ Passed 🏆 1.19 x faster
* lapjvx : ✅ Passed 🏆 1.62 x faster
* lapjvxa : ✅ Passed 🏆 2.35 x faster
* lapjvs : ✅ Passed 🏆 1.33 x faster
* lapjvsa : ✅ Passed 🏆 1.32 x faster
* lapjvc : ✅ Passed 🐌 1.94 x slower
* lapjv : ✅ Passed 🐌 1.34 x slower
* lapjvx : ✅ Passed 🏆 1.15 x faster
* lapjvxa : ✅ Passed 🏆 1.41 x faster
* lapjvs : ✅ Passed 🐌 1.98 x slower
* lapjvsa : ✅ Passed 🏆 1.13 x faster
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvxa : 0.00003084s
2. lapjvx : 0.00004490s
3. lapjvs : 0.00005469s
4. lapjvsa : 0.00005475s
5. lapjv : 0.00006076s
6. scipy ⭐ : 0.00007253s
7. lapjvc : 0.00013553s
1. lapjvxa : 0.00003351s
2. lapjvx : 0.00004110s
3. lapjvsa : 0.00004164s
4. scipy ⭐ : 0.00004721s
5. lapjv : 0.00006310s
6. lapjvc : 0.00009150s
7. lapjvs : 0.00009357s
-------------------------------

@@ -173,17 +188,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🏆 1.19 x faster
* lapjv : ✅ Passed 🏆 2.1 x faster
* lapjvx : ✅ Passed 🏆 2.77 x faster
* lapjvxa : ✅ Passed 🏆 4.34 x faster
* lapjvs : ✅ Passed 🏆 2.3 x faster
* lapjvsa : ✅ Passed 🏆 5.58 x faster
* lapjvc : ✅ Passed 🏆 1.43 x faster
* lapjv : ✅ Passed 🏆 1.44 x faster
* lapjvx : ✅ Passed 🏆 2.25 x faster
* lapjvxa : ✅ Passed 🏆 2.94 x faster
* lapjvs : ✅ Passed 🏆 2.27 x faster
* lapjvsa : ✅ Passed 🏆 3.99 x faster
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvsa : 0.00001290s
2. lapjvxa : 0.00001658s
3. lapjvx : 0.00002601s
4. lapjvs : 0.00003129s
5. lapjv : 0.00003426s
6. lapjvc : 0.00006070s
7. scipy ⭐ : 0.00007195s
1. lapjvsa : 0.00002166s
2. lapjvxa : 0.00002932s
3. lapjvs : 0.00003803s
4. lapjvx : 0.00003831s
5. lapjv : 0.00005988s
6. lapjvc : 0.00006028s
7. scipy ⭐ : 0.00008634s
-------------------------------

@@ -194,17 +209,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 1.59 x slower
* lapjv : ✅ Passed 🏆 1.14 x faster
* lapjvx : ✅ Passed 🏆 1.65 x faster
* lapjvxa : ✅ Passed 🏆 2.33 x faster
* lapjvs : ✅ Passed 🏆 1.58 x faster
* lapjvsa : ✅ Passed 🏆 1.37 x faster
* lapjvc : ✅ Passed 🐌 1.22 x slower
* lapjv : ✅ Passed 🏆 1.07 x faster
* lapjvx : ✅ Passed 🏆 1.54 x faster
* lapjvxa : ✅ Passed 🏆 1.88 x faster
* lapjvs : ✅ Passed 🏆 1.63 x faster
* lapjvsa : ✅ Passed 🏆 1.47 x faster
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvxa : 0.00003199s
2. lapjvx : 0.00004507s
3. lapjvs : 0.00004723s
4. lapjvsa : 0.00005446s
5. lapjv : 0.00006505s
6. scipy ⭐ : 0.00007443s
7. lapjvc : 0.00011813s
1. lapjvxa : 0.00004026s
2. lapjvs : 0.00004654s
3. lapjvx : 0.00004924s
4. lapjvsa : 0.00005152s
5. lapjv : 0.00007051s
6. scipy ⭐ : 0.00007566s
7. lapjvc : 0.00009201s
-------------------------------

@@ -215,17 +230,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 4.19 x slower
* lapjv : ✅ Passed 🏆 2.46 x faster
* lapjvx : ✅ Passed 🏆 2.84 x faster
* lapjvxa : ✅ Passed 🏆 3.94 x faster
* lapjvs : ✅ Passed 🏆 4.37 x faster
* lapjvsa : ✅ Passed 🏆 4.45 x faster
* lapjvc : ✅ Passed 🐌 4.97 x slower
* lapjv : ✅ Passed 🏆 2.02 x faster
* lapjvx : ✅ Passed 🏆 2.34 x faster
* lapjvxa : ✅ Passed 🏆 3.09 x faster
* lapjvs : ✅ Passed 🏆 3.95 x faster
* lapjvsa : ✅ Passed 🏆 3.89 x faster
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvsa : 0.00108461s
2. lapjvs : 0.00110318s
3. lapjvxa : 0.00122390s
4. lapjvx : 0.00169714s
5. lapjv : 0.00195890s
6. scipy ⭐ : 0.00482560s
7. lapjvc : 0.02019986s
1. lapjvs : 0.00116294s
2. lapjvsa : 0.00117967s
3. lapjvxa : 0.00148459s
4. lapjvx : 0.00196006s
5. lapjv : 0.00227485s
6. scipy ⭐ : 0.00458916s
7. lapjvc : 0.02279758s
-------------------------------

@@ -236,17 +251,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 1.01 x slower
* lapjv : ✅ Passed 🏆 2.0 x faster
* lapjvx : ✅ Passed 🏆 2.06 x faster
* lapjvxa : ✅ Passed 🏆 2.06 x faster
* lapjvs : ✅ Passed 🏆 2.05 x faster
* lapjvsa : ✅ Passed 🏆 2.06 x faster
* lapjvc : ✅ Passed 🏆 1.46 x faster
* lapjv : ✅ Passed 🏆 1.19 x faster
* lapjvx : ✅ Passed 🏆 1.19 x faster
* lapjvxa : ✅ Passed 🏆 1.21 x faster
* lapjvs : ✅ Passed 🏆 1.58 x faster
* lapjvsa : ✅ Passed 🏆 1.6 x faster
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvsa : 0.00498536s
2. lapjvxa : 0.00499154s
3. lapjvx : 0.00499524s
4. lapjvs : 0.00501234s
5. lapjv : 0.00512501s
6. scipy ⭐ : 0.01026616s
7. lapjvc : 0.01041151s
1. lapjvsa : 0.00610949s
2. lapjvs : 0.00617076s
3. lapjvc : 0.00669765s
4. lapjvxa : 0.00807378s
5. lapjvx : 0.00817340s
6. lapjv : 0.00822582s
7. scipy ⭐ : 0.00976096s
-------------------------------

@@ -257,17 +272,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 4.17 x slower
* lapjv : ✅ Passed 🏆 3.74 x faster
* lapjvx : ✅ Passed 🏆 4.29 x faster
* lapjvxa : ✅ Passed 🏆 4.37 x faster
* lapjvs : ✅ Passed 🏆 4.33 x faster
* lapjvsa : ✅ Passed 🏆 4.38 x faster
* lapjvc : ✅ Passed 🐌 4.66 x slower
* lapjv : ✅ Passed 🏆 2.07 x faster
* lapjvx : ✅ Passed 🏆 3.44 x faster
* lapjvxa : ✅ Passed 🏆 3.42 x faster
* lapjvs : ✅ Passed 🏆 4.27 x faster
* lapjvsa : ✅ Passed 🏆 4.37 x faster
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvsa : 0.00136651s
2. lapjvxa : 0.00136884s
3. lapjvs : 0.00138280s
4. lapjvx : 0.00139509s
5. lapjv : 0.00160113s
6. scipy ⭐ : 0.00598653s
7. lapjvc : 0.02498232s
1. lapjvsa : 0.00128856s
2. lapjvs : 0.00131904s
3. lapjvx : 0.00163978s
4. lapjvxa : 0.00164817s
5. lapjv : 0.00272247s
6. scipy ⭐ : 0.00563737s
7. lapjvc : 0.02625532s
-------------------------------

@@ -278,17 +293,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 222.34 x slower
* lapjv : ✅ Passed 🏆 1.15 x faster
* lapjvx : ✅ Passed 🏆 1.31 x faster
* lapjvxa : ✅ Passed 🏆 1.29 x faster
* lapjvc : ✅ Passed 🐌 257.47 x slower
* lapjv : ✅ Passed 🏆 1.09 x faster
* lapjvx : ✅ Passed 🏆 1.09 x faster
* lapjvxa : ✅ Passed 🏆 1.08 x faster
* lapjvs : ✅ Passed 🐌 1.11 x slower
* lapjvsa : ✅ Passed 🐌 1.11 x slower
* lapjvsa : ✅ Passed 🐌 1.1 x slower
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvx : 0.07696005s
2. lapjvxa : 0.07810211s
3. lapjv : 0.08824028s
4. scipy ⭐ : 0.10107496s
5. lapjvsa : 0.11232857s
6. lapjvs : 0.11252413s
7. lapjvc : 22.47302152s
1. lapjvx : 0.09400308s
2. lapjv : 0.09424586s
3. lapjvxa : 0.09509772s
4. scipy ⭐ : 0.10258777s
5. lapjvsa : 0.11241747s
6. lapjvs : 0.11372150s
7. lapjvc : 26.41295845s
-------------------------------

@@ -299,17 +314,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🏆 1.17 x faster
* lapjv : ✅ Passed 🏆 1.3 x faster
* lapjvx : ✅ Passed 🏆 1.31 x faster
* lapjvxa : ✅ Passed 🏆 1.31 x faster
* lapjvs : ✅ Passed 🏆 2.06 x faster
* lapjvsa : ✅ Passed 🏆 2.06 x faster
* lapjvc : ✅ Passed 🐌 1.02 x slower
* lapjv : ✅ Passed 🐌 1.62 x slower
* lapjvx : ✅ Passed 🐌 1.62 x slower
* lapjvxa : ✅ Passed 🐌 1.62 x slower
* lapjvs : ✅ Passed 🏆 1.76 x faster
* lapjvsa : ✅ Passed 🏆 1.76 x faster
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvs : 1.16571718s
2. lapjvsa : 1.16708573s
3. lapjvxa : 1.84065010s
4. lapjvx : 1.84106529s
5. lapjv : 1.84539000s
6. lapjvc : 2.04553916s
7. scipy ⭐ : 2.40261425s
1. lapjvs : 1.34793133s
2. lapjvsa : 1.34966543s
3. scipy ⭐ : 2.37237136s
4. lapjvc : 2.41397720s
5. lapjvxa : 3.84284193s
6. lapjvx : 3.84922083s
7. lapjv : 3.85101395s
-------------------------------

@@ -320,17 +335,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 230.42 x slower
* lapjv : ✅ Passed 🏆 2.24 x faster
* lapjvx : ✅ Passed 🏆 2.54 x faster
* lapjvxa : ✅ Passed 🏆 2.5 x faster
* lapjvs : ✅ Passed 🏆 1.66 x faster
* lapjvsa : ✅ Passed 🏆 1.68 x faster
* lapjvc : ✅ Passed 🐌 273.6 x slower
* lapjv : ✅ Passed 🏆 2.03 x faster
* lapjvx : ✅ Passed 🏆 2.04 x faster
* lapjvxa : ✅ Passed 🏆 2.05 x faster
* lapjvs : ✅ Passed 🏆 1.59 x faster
* lapjvsa : ✅ Passed 🏆 1.63 x faster
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvx : 0.16310518s
2. lapjvxa : 0.16545943s
3. lapjv : 0.18474822s
4. lapjvsa : 0.24673601s
5. lapjvs : 0.24954140s
6. scipy ⭐ : 0.41429755s
7. lapjvc : 95.46102137s
1. lapjvxa : 0.19721307s
2. lapjvx : 0.19786040s
3. lapjv : 0.19958704s
4. lapjvsa : 0.24835583s
5. lapjvs : 0.25347052s
6. scipy ⭐ : 0.40418303s
7. lapjvc : 110.58635478s
-------------------------------

@@ -343,3 +358,3 @@ ```

https://github.com/rathaROG/lapx/actions/runs/18851354956/job/53788495229
https://github.com/rathaROG/lapx/actions/runs/18961613065/job/54149890234

@@ -350,17 +365,17 @@ ```

-----------------------------------------
* lapjvc : ✅ Passed 🐌 1.65 x slower
* lapjv : ✅ Passed 🐌 4.13 x slower
* lapjvx : ✅ Passed 🐌 1.26 x slower
* lapjvxa : ✅ Passed 🏆 3.06 x faster
* lapjvs : ✅ Passed 🐌 1.52 x slower
* lapjvsa : ✅ Passed 🐌 1.46 x slower
* lapjvc : ✅ Passed 🐌 1.71 x slower
* lapjv : ✅ Passed 🐌 5.14 x slower
* lapjvx : ✅ Passed 🐌 2.32 x slower
* lapjvxa : ✅ Passed 🐌 1.57 x slower
* lapjvs : ✅ Passed 🐌 3.57 x slower
* lapjvsa : ✅ Passed 🐌 3.25 x slower
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvxa : 0.00001037s
2. scipy ⭐ : 0.00003179s
3. lapjvx : 0.00003996s
4. lapjvsa : 0.00004642s
5. lapjvs : 0.00004833s
6. lapjvc : 0.00005250s
7. lapjv : 0.00013146s
1. scipy ⭐ : 0.00000567s
2. lapjvxa : 0.00000888s
3. lapjvc : 0.00000971s
4. lapjvx : 0.00001317s
5. lapjvsa : 0.00001842s
6. lapjvs : 0.00002025s
7. lapjv : 0.00002913s
-------------------------------

@@ -371,17 +386,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 1.51 x slower
* lapjv : ✅ Passed 🐌 1.65 x slower
* lapjvx : ✅ Passed 🐌 1.09 x slower
* lapjvxa : ✅ Passed 🏆 1.27 x faster
* lapjvs : ✅ Passed 🐌 1.67 x slower
* lapjvsa : ✅ Passed 🏆 1.99 x faster
* lapjvc : ✅ Passed 🐌 1.83 x slower
* lapjv : ✅ Passed 🐌 4.14 x slower
* lapjvx : ✅ Passed 🐌 1.83 x slower
* lapjvxa : ✅ Passed 🐌 1.59 x slower
* lapjvs : ✅ Passed 🐌 2.01 x slower
* lapjvsa : ✅ Passed 🏆 1.32 x faster
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvsa : 0.00000308s
2. lapjvxa : 0.00000483s
3. scipy ⭐ : 0.00000613s
4. lapjvx : 0.00000667s
5. lapjvc : 0.00000925s
6. lapjv : 0.00001013s
7. lapjvs : 0.00001021s
1. lapjvsa : 0.00000283s
2. scipy ⭐ : 0.00000375s
3. lapjvxa : 0.00000596s
4. lapjvc : 0.00000687s
5. lapjvx : 0.00000687s
6. lapjvs : 0.00000754s
7. lapjv : 0.00001554s
-------------------------------

@@ -392,17 +407,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 1.95 x slower
* lapjv : ✅ Passed 🐌 3.42 x slower
* lapjvx : ✅ Passed 🐌 2.1 x slower
* lapjvxa : ✅ Passed 🐌 1.68 x slower
* lapjvs : ✅ Passed 🐌 3.23 x slower
* lapjvsa : ✅ Passed 🐌 4.63 x slower
* lapjvc : ✅ Passed 🐌 2.66 x slower
* lapjv : ✅ Passed 🐌 5.96 x slower
* lapjvx : ✅ Passed 🐌 3.03 x slower
* lapjvxa : ✅ Passed 🐌 2.6 x slower
* lapjvs : ✅ Passed 🐌 3.77 x slower
* lapjvsa : ✅ Passed 🐌 4.37 x slower
----- 🎉 SPEED RANKING 🎉 -----
1. scipy ⭐ : 0.00000429s
2. lapjvxa : 0.00000721s
3. lapjvc : 0.00000837s
4. lapjvx : 0.00000900s
5. lapjvs : 0.00001387s
6. lapjv : 0.00001467s
7. lapjvsa : 0.00001988s
1. scipy ⭐ : 0.00000292s
2. lapjvxa : 0.00000758s
3. lapjvc : 0.00000775s
4. lapjvx : 0.00000883s
5. lapjvs : 0.00001100s
6. lapjvsa : 0.00001275s
7. lapjv : 0.00001738s
-------------------------------

@@ -413,17 +428,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 1.49 x slower
* lapjv : ✅ Passed 🏆 1.7 x faster
* lapjvx : ✅ Passed 🏆 2.13 x faster
* lapjvxa : ✅ Passed 🏆 2.67 x faster
* lapjvs : ✅ Passed 🏆 1.8 x faster
* lapjvsa : ✅ Passed 🏆 1.89 x faster
* lapjvc : ✅ Passed 🐌 2.19 x slower
* lapjv : ✅ Passed 🏆 1.34 x faster
* lapjvx : ✅ Passed 🏆 1.83 x faster
* lapjvxa : ✅ Passed 🏆 2.24 x faster
* lapjvs : ✅ Passed 🏆 1.74 x faster
* lapjvsa : ✅ Passed 🏆 1.22 x faster
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvxa : 0.00002146s
2. lapjvx : 0.00002683s
3. lapjvsa : 0.00003033s
4. lapjvs : 0.00003183s
5. lapjv : 0.00003358s
6. scipy ⭐ : 0.00005721s
7. lapjvc : 0.00008517s
1. lapjvxa : 0.00001796s
2. lapjvx : 0.00002200s
3. lapjvs : 0.00002308s
4. lapjv : 0.00002992s
5. lapjvsa : 0.00003283s
6. scipy ⭐ : 0.00004017s
7. lapjvc : 0.00008783s
-------------------------------

@@ -434,17 +449,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🏆 1.41 x faster
* lapjv : ✅ Passed 🏆 3.95 x faster
* lapjvx : ✅ Passed 🏆 4.4 x faster
* lapjvxa : ✅ Passed 🏆 6.29 x faster
* lapjvs : ✅ Passed 🏆 3.59 x faster
* lapjvsa : ✅ Passed 🏆 6.65 x faster
* lapjvc : ✅ Passed 🏆 1.16 x faster
* lapjv : ✅ Passed 🏆 2.13 x faster
* lapjvx : ✅ Passed 🏆 2.69 x faster
* lapjvxa : ✅ Passed 🏆 3.29 x faster
* lapjvs : ✅ Passed 🏆 2.35 x faster
* lapjvsa : ✅ Passed 🏆 3.41 x faster
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvsa : 0.00001100s
2. lapjvxa : 0.00001162s
3. lapjvx : 0.00001662s
4. lapjv : 0.00001850s
5. lapjvs : 0.00002038s
6. lapjvc : 0.00005183s
7. scipy ⭐ : 0.00007312s
1. lapjvsa : 0.00002146s
2. lapjvxa : 0.00002221s
3. lapjvx : 0.00002721s
4. lapjvs : 0.00003108s
5. lapjv : 0.00003437s
6. lapjvc : 0.00006300s
7. scipy ⭐ : 0.00007317s
-------------------------------

@@ -455,17 +470,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 2.17 x slower
* lapjv : ✅ Passed 🏆 1.56 x faster
* lapjvx : ✅ Passed 🏆 1.73 x faster
* lapjvxa : ✅ Passed 🏆 2.29 x faster
* lapjvs : ✅ Passed 🏆 1.46 x faster
* lapjvsa : ✅ Passed 🏆 1.4 x faster
* lapjvc : ✅ Passed 🐌 2.31 x slower
* lapjv : ✅ Passed 🏆 1.15 x faster
* lapjvx : ✅ Passed 🏆 1.54 x faster
* lapjvxa : ✅ Passed 🏆 1.89 x faster
* lapjvs : ✅ Passed 🏆 1.47 x faster
* lapjvsa : ✅ Passed 🏆 1.53 x faster
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvxa : 0.00001733s
2. lapjvx : 0.00002292s
3. lapjv : 0.00002546s
4. lapjvs : 0.00002713s
5. lapjvsa : 0.00002838s
6. scipy ⭐ : 0.00003962s
7. lapjvc : 0.00008579s
1. lapjvxa : 0.00001904s
2. lapjvx : 0.00002342s
3. lapjvsa : 0.00002350s
4. lapjvs : 0.00002458s
5. lapjv : 0.00003133s
6. scipy ⭐ : 0.00003604s
7. lapjvc : 0.00008329s
-------------------------------

@@ -476,17 +491,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 5.08 x slower
* lapjv : ✅ Passed 🏆 1.44 x faster
* lapjvx : ✅ Passed 🏆 1.95 x faster
* lapjvxa : ✅ Passed 🏆 3.47 x faster
* lapjvs : ✅ Passed 🏆 2.22 x faster
* lapjvsa : ✅ Passed 🏆 2.18 x faster
* lapjvc : ✅ Passed 🐌 6.57 x slower
* lapjv : ✅ Passed 🏆 3.31 x faster
* lapjvx : ✅ Passed 🏆 3.69 x faster
* lapjvxa : ✅ Passed 🏆 3.81 x faster
* lapjvs : ✅ Passed 🏆 2.86 x faster
* lapjvsa : ✅ Passed 🏆 3.05 x faster
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvxa : 0.00098750s
2. lapjvs : 0.00154171s
3. lapjvsa : 0.00157042s
4. lapjvx : 0.00175387s
5. lapjv : 0.00237538s
6. scipy ⭐ : 0.00342258s
7. lapjvc : 0.01738238s
1. lapjvxa : 0.00073746s
2. lapjvx : 0.00076150s
3. lapjv : 0.00085017s
4. lapjvsa : 0.00092200s
5. lapjvs : 0.00098329s
6. scipy ⭐ : 0.00281312s
7. lapjvc : 0.01849563s
-------------------------------

@@ -497,17 +512,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 1.23 x slower
* lapjv : ✅ Passed 🏆 3.51 x faster
* lapjvx : ✅ Passed 🏆 3.61 x faster
* lapjvxa : ✅ Passed 🏆 3.69 x faster
* lapjvs : ✅ Passed 🏆 3.29 x faster
* lapjvsa : ✅ Passed 🏆 3.42 x faster
* lapjvc : ✅ Passed 🐌 1.37 x slower
* lapjv : ✅ Passed 🏆 1.09 x faster
* lapjvx : ✅ Passed 🏆 1.11 x faster
* lapjvxa : ✅ Passed 🏆 1.18 x faster
* lapjvs : ✅ Passed 🏆 1.06 x faster
* lapjvsa : ✅ Passed 🏆 1.04 x faster
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvxa : 0.00192367s
2. lapjvx : 0.00196654s
3. lapjv : 0.00201971s
4. lapjvsa : 0.00207583s
5. lapjvs : 0.00215921s
6. scipy ⭐ : 0.00709604s
7. lapjvc : 0.00875521s
1. lapjvxa : 0.00629000s
2. lapjvx : 0.00669658s
3. lapjv : 0.00680446s
4. lapjvs : 0.00702367s
5. lapjvsa : 0.00716958s
6. scipy ⭐ : 0.00744075s
7. lapjvc : 0.01022246s
-------------------------------

@@ -518,17 +533,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 5.09 x slower
* lapjv : ✅ Passed 🏆 3.0 x faster
* lapjvx : ✅ Passed 🏆 3.6 x faster
* lapjvxa : ✅ Passed 🏆 3.71 x faster
* lapjvs : ✅ Passed 🏆 2.83 x faster
* lapjvsa : ✅ Passed 🏆 2.74 x faster
* lapjvc : ✅ Passed 🐌 5.87 x slower
* lapjv : ✅ Passed 🏆 2.05 x faster
* lapjvx : ✅ Passed 🏆 3.86 x faster
* lapjvxa : ✅ Passed 🏆 2.83 x faster
* lapjvs : ✅ Passed 🏆 3.29 x faster
* lapjvsa : ✅ Passed 🏆 3.35 x faster
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvxa : 0.00110967s
2. lapjvx : 0.00114104s
3. lapjv : 0.00137046s
4. lapjvs : 0.00145308s
5. lapjvsa : 0.00150308s
6. scipy ⭐ : 0.00411283s
7. lapjvc : 0.02093913s
1. lapjvx : 0.00102792s
2. lapjvsa : 0.00118358s
3. lapjvs : 0.00120550s
4. lapjvxa : 0.00140042s
5. lapjv : 0.00192958s
6. scipy ⭐ : 0.00396425s
7. lapjvc : 0.02326779s
-------------------------------

@@ -539,17 +554,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 199.7 x slower
* lapjv : ✅ Passed 🐌 1.48 x slower
* lapjvx : ✅ Passed 🏆 1.17 x faster
* lapjvxa : ✅ Passed 🏆 1.08 x faster
* lapjvs : ✅ Passed 🐌 1.75 x slower
* lapjvsa : ✅ Passed 🐌 1.58 x slower
* lapjvc : ✅ Passed 🐌 349.67 x slower
* lapjv : ✅ Passed 🐌 3.52 x slower
* lapjvx : ✅ Passed 🐌 1.43 x slower
* lapjvxa : ✅ Passed 🐌 1.45 x slower
* lapjvs : ✅ Passed 🐌 3.18 x slower
* lapjvsa : ✅ Passed 🐌 2.84 x slower
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvx : 0.09995704s
2. lapjvxa : 0.10858558s
3. scipy ⭐ : 0.11726183s
4. lapjv : 0.17386667s
5. lapjvsa : 0.18564625s
6. lapjvs : 0.20479308s
7. lapjvc : 23.41745225s
1. scipy ⭐ : 0.07873675s
2. lapjvx : 0.11293429s
3. lapjvxa : 0.11401713s
4. lapjvsa : 0.22370100s
5. lapjvs : 0.25039458s
6. lapjv : 0.27737229s
7. lapjvc : 27.53160242s
-------------------------------

@@ -560,17 +575,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 2.01 x slower
* lapjv : ✅ Passed 🏆 1.21 x faster
* lapjvx : ✅ Passed 🏆 1.25 x faster
* lapjvxa : ✅ Passed 🏆 1.19 x faster
* lapjvs : ✅ Passed 🏆 1.73 x faster
* lapjvsa : ✅ Passed 🏆 1.84 x faster
* lapjvc : ✅ Passed 🐌 1.35 x slower
* lapjv : ✅ Passed 🏆 2.36 x faster
* lapjvx : ✅ Passed 🏆 2.15 x faster
* lapjvxa : ✅ Passed 🏆 2.23 x faster
* lapjvs : ✅ Passed 🏆 2.52 x faster
* lapjvsa : ✅ Passed 🏆 2.89 x faster
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvsa : 0.92814183s
2. lapjvs : 0.98585971s
3. lapjvx : 1.36841413s
4. lapjv : 1.41392200s
5. lapjvxa : 1.43138329s
6. scipy ⭐ : 1.70584088s
7. lapjvc : 3.43550417s
1. lapjvsa : 0.90473763s
2. lapjvs : 1.03581571s
3. lapjv : 1.10758192s
4. lapjvxa : 1.17104621s
5. lapjvx : 1.21566396s
6. scipy ⭐ : 2.61083117s
7. lapjvc : 3.53453567s
-------------------------------

@@ -581,17 +596,17 @@

-----------------------------------------
* lapjvc : ✅ Passed 🐌 283.01 x slower
* lapjv : ✅ Passed 🐌 2.24 x slower
* lapjvx : ✅ Passed 🏆 1.39 x faster
* lapjvxa : ✅ Passed 🏆 2.27 x faster
* lapjvs : ✅ Passed 🐌 1.5 x slower
* lapjvsa : ✅ Passed 🐌 1.79 x slower
* lapjvc : ✅ Passed 🐌 308.42 x slower
* lapjv : ✅ Passed 🐌 2.1 x slower
* lapjvx : ✅ Passed 🏆 1.32 x faster
* lapjvxa : ✅ Passed 🏆 1.87 x faster
* lapjvs : ✅ Passed 🐌 2.3 x slower
* lapjvsa : ✅ Passed 🐌 1.98 x slower
----- 🎉 SPEED RANKING 🎉 -----
1. lapjvxa : 0.16666625s
2. lapjvx : 0.27308525s
3. scipy ⭐ : 0.37829733s
4. lapjvs : 0.56853200s
5. lapjvsa : 0.67584400s
6. lapjv : 0.84921917s
7. lapjvc : 107.06137171s
1. lapjvxa : 0.18422821s
2. lapjvx : 0.26109171s
3. scipy ⭐ : 0.34502992s
4. lapjvsa : 0.68365575s
5. lapjv : 0.72371579s
6. lapjvs : 0.79274062s
7. lapjvc : 106.41484879s
-------------------------------

@@ -602,3 +617,3 @@ ```

👁️ See newer benchmark results on all platforms [here on GitHub](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml).
👁️ See newer benchmark results on all platforms [here on GitHub](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml).

@@ -609,3 +624,3 @@ ## 🕵️‍♂️ Other Benchmarks

This [benchmark_tracking.py](https://github.com/rathaROG/lapx/blob/main/.github/test/benchmark_tracking.py) is specifically desinged for the Object Tracking applications, with [SciPy](https://pypi.org/project/scipy/) as the baseline.
This [benchmark_tracking.py](https://github.com/rathaROG/lapx/blob/main/benchmarks/benchmark_tracking.py) is specifically desinged for ***Object Tracking*** application, with [SciPy](https://pypi.org/project/scipy/) as the baseline.

@@ -616,7 +631,7 @@ ```

git clone https://github.com/rathaROG/lapx.git
cd lapx/.github/test
cd lapx/benchmarks
python benchmark_tracking.py
```
As shown in the updated benchmark results below, the new function [`lapjvx()`](https://github.com/rathaROG/lapx#2-the-new-function-lapjvx) (LAPX LAPJVX in the tables) and the original [`lapjv()`](https://github.com/rathaROG/lapx#1-the-original-function-lapjv) (LAPX LAPJV in the tables) consistently matches the baseline outputs of SciPy's [`linear_sum_assignment`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html), as indicated by “✓” and ✅ in the tables.
As shown in the benchmark results below, the new function [`lapjvx()`](https://github.com/rathaROG/lapx#2-the-new-function-lapjvx) (LAPX LAPJVX in the tables) and the original [`lapjv()`](https://github.com/rathaROG/lapx#1-the-original-function-lapjv) (LAPX LAPJV in the tables) consistently matches the baseline outputs of SciPy's [`linear_sum_assignment`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html), as indicated by “✓” and ✅ in the tables.

@@ -627,9 +642,19 @@ In most scenarios, `lapjvx()` and `lapjv()` demonstrate faster performance than the baseline SciPy's `linear_sum_assignment`, and they remain competitive with other LAPX variants such as [`lapjvc`](https://github.com/rathaROG/lapx#4-the-new-function-lapjvc) (LAPX LAPJVC in the tables). When in-function filtering with `cost_limit` is used, `lapjv()` (LAPX LAPJV-IFT in the tables) experiences a significant performance impact and can produce different outputs compared to SciPy's baseline, as indicated by “✗” and ⚠️ in the tables.

💡 To achieve optimal performance of `lapjvx()` or `lapjv()` in object tracking application, follow the implementation in the current [`benchmark_tracking.py`](https://github.com/rathaROG/lapx/blob/main/.github/test/benchmark_tracking.py) script.
💡 To achieve optimal performance of `lapjvx()` or `lapjv()` in object tracking application, follow the implementation in the current [`benchmark_tracking.py`](https://github.com/rathaROG/lapx/blob/main/benchmarks/benchmark_tracking.py) script.
<details><summary>📊 Show the results:</summary>
<details><summary>📊 Show the results:</summary><br>
https://github.com/rathaROG/lapx/actions/runs/18830580672/job/53721233510
Run on my local Windows 11 i9-13900KS (8 p-core + 8 e-core) + python 3.11.9
```
numpy==2.2.6
scipy==1.16.3
lapx @ git+https://github.com/rathaROG/lapx.git@ca0bbee8e319fe005c557d5a2bcce1148d89797c
```
```
Microsoft Windows [Version 10.0.26200.7019]
(c) Microsoft Corporation. All rights reserved.
D:\DEV\lapx_all\tmp\lapx\benchmarks>python benchmark_tracking.py
#################################################################

@@ -640,13 +665,13 @@ # Benchmark with threshold (cost_limit) = 0.05

-----------------------------------------------------------------------------------------------------------------------
Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS
Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS
-----------------------------------------------------------------------------------------------------------------------
10x10 | 0.000325s 6th | 0.000137s ✗ 1st | 0.000168s ✓ 3rd | 0.000177s ✓ 4th | 0.000206s ✓ 5th | 0.000162s ✓ 2nd
25x20 | 0.000170s 5th | 0.000191s ✗ 6th | 0.000155s ✓ 1st | 0.000170s ✓ 4th | 0.000162s ✓ 3rd | 0.000160s ✓ 2nd
50x50 | 0.000265s 6th | 0.000214s ✗ 4th | 0.000190s ✓ 1st | 0.000193s ✓ 2nd | 0.000246s ✓ 5th | 0.000194s ✓ 3rd
100x150 | 0.000453s 4th | 0.001335s ✓ 6th | 0.000402s ✓ 3rd | 0.000396s ✓ 2nd | 0.001067s ✓ 5th | 0.000330s ✓ 1st
250x250 | 0.002854s 5th | 0.002952s ✓ 6th | 0.001731s ✓ 3rd | 0.001559s ✓ 2nd | 0.001977s ✓ 4th | 0.001536s ✓ 1st
550x500 | 0.008365s 1st | 0.064973s ✓ 6th | 0.012927s ✓ 4th | 0.011949s ✓ 3rd | 0.030009s ✓ 5th | 0.011664s ✓ 2nd
1000x1000 | 0.051245s 2nd | 0.111529s ✓ 6th | 0.057231s ✓ 5th | 0.055981s ✓ 4th | 0.044361s ✓ 1st | 0.055155s ✓ 3rd
2000x2500 | 0.075957s 4th | 3.645535s ✓ 6th | 0.020558s ✓ 1st | 0.020706s ✓ 2nd | 2.975010s ✓ 5th | 0.034655s ✓ 3rd
5000x5000 | 2.114572s 5th | 2.563577s ✓ 6th | 1.214149s ✓ 2nd | 1.219787s ✓ 3rd | 1.844269s ✓ 4th | 0.959447s ✓ 1st
10x10 | 0.000063s 4th | 0.000057s ✓ 2nd | 0.000063s ✓ 5th | 0.000069s ✓ 6th | 0.000061s ✓ 3rd | 0.000057s ✓ 1st
25x20 | 0.000058s 4th | 0.000104s ✗ 6th | 0.000062s ✓ 5th | 0.000052s ✓ 2nd | 0.000058s ✓ 3rd | 0.000051s ✓ 1st
50x50 | 0.000083s 4th | 0.000086s ✗ 5th | 0.000068s ✓ 3rd | 0.000058s ✓ 1st | 0.000103s ✓ 6th | 0.000063s ✓ 2nd
100x150 | 0.000131s 2nd | 0.000828s ✗ 6th | 0.000132s ✓ 3rd | 0.000144s ✓ 4th | 0.000680s ✓ 5th | 0.000123s ✓ 1st
250x250 | 0.001126s 4th | 0.001218s ✓ 5th | 0.000557s ✓ 2nd | 0.000537s ✓ 1st | 0.001516s ✓ 6th | 0.000605s ✓ 3rd
550x500 | 0.003531s 4th | 0.011714s ✓ 5th | 0.001424s ✓ 2nd | 0.001358s ✓ 1st | 0.017545s ✓ 6th | 0.001511s ✓ 3rd
1000x1000 | 0.022934s 4th | 0.026359s ✓ 5th | 0.010415s ✓ 2nd | 0.010320s ✓ 1st | 0.031669s ✓ 6th | 0.012068s ✓ 3rd
2000x2500 | 0.034198s 4th | 1.627013s ✓ 6th | 0.013647s ✓ 1st | 0.015660s ✓ 2nd | 1.531048s ✓ 5th | 0.022275s ✓ 3rd
5000x5000 | 1.095034s 3rd | 2.335637s ✓ 6th | 1.082954s ✓ 2nd | 1.103870s ✓ 4th | 1.140890s ✓ 5th | 0.496765s ✓ 1st
-----------------------------------------------------------------------------------------------------------------------

@@ -656,10 +681,10 @@

🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
1. LAPX LAPJVS : 1063.3028 ms | ✅ | 🥇x3 🥈x3 🥉x3
2. LAPX LAPJV : 1307.5116 ms | ✅ | 🥇x3 🥈x1 🥉x3 🚩x1 🏳️x1
3. LAPX LAPJVX : 1310.9174 ms | ✅ | 🥈x4 🥉x2 🚩x3
4. BASELINE SciPy : 2254.2068 ms | ⭐ | 🥇x1 🥈x1 🚩x2 🏳️x3 🥴x2
5. LAPX LAPJVC : 4897.3061 ms | ✅ | 🥇x1 🥉x1 🚩x2 🏳️x5
6. LAPX LAPJV-IFT : 6390.4416 ms | ⚠️ | 🥇x1 🚩x1 🥴x7
🎉 ------------------------------------------------------------------------- 🎉
🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
1. LAPX LAPJVS : 533.5186 ms | ✅ | 🥇x4 🥈x1 🥉x4
2. LAPX LAPJV : 1109.3228 ms | ✅ | 🥇x1 🥈x4 🥉x2 🏳️x2
3. LAPX LAPJVX : 1132.0674 ms | ✅ | 🥇x4 🥈x2 🚩x2 🥴x1
4. BASELINE SciPy : 1157.1577 ms | ⭐ | 🥈x1 🥉x1 🚩x7
5. LAPX LAPJVC : 2723.5708 ms | ✅ | 🥉x2 🏳️x3 🥴x4
6. LAPX LAPJV-IFT : 4003.0145 ms | ⚠️ | 🥈x1 🏳️x4 🥴x4
🎉 ------------------------------------------------------------------------- 🎉

@@ -672,13 +697,13 @@

-----------------------------------------------------------------------------------------------------------------------
Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS
Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS
-----------------------------------------------------------------------------------------------------------------------
10x10 | 0.000188s 6th | 0.000157s ✗ 5th | 0.000130s ✓ 3rd | 0.000129s ✓ 1st | 0.000139s ✓ 4th | 0.000129s ✓ 2nd
25x20 | 0.000152s 3rd | 0.000171s ✗ 6th | 0.000148s ✓ 1st | 0.000149s ✓ 2nd | 0.000159s ✓ 4th | 0.000160s ✓ 5th
50x50 | 0.000245s 6th | 0.000230s ✗ 4th | 0.000188s ✓ 1st | 0.000194s ✓ 2nd | 0.000231s ✓ 5th | 0.000197s ✓ 3rd
100x150 | 0.000417s 4th | 0.001254s ✓ 6th | 0.000334s ✓ 2nd | 0.000333s ✓ 1st | 0.000887s ✓ 5th | 0.000349s ✓ 3rd
250x250 | 0.002642s 5th | 0.003365s ✓ 6th | 0.001734s ✓ 2nd | 0.001751s ✓ 3rd | 0.002294s ✓ 4th | 0.001708s ✓ 1st
550x500 | 0.007055s 1st | 0.127557s ✓ 6th | 0.011708s ✓ 4th | 0.011700s ✓ 3rd | 0.040566s ✓ 5th | 0.011671s ✓ 2nd
1000x1000 | 0.045616s 5th | 0.085374s ✓ 6th | 0.041577s ✓ 3rd | 0.041732s ✓ 4th | 0.040828s ✓ 1st | 0.041053s ✓ 2nd
2000x2500 | 0.075874s 4th | 3.594363s ✓ 6th | 0.020592s ✓ 2nd | 0.020426s ✓ 1st | 2.840181s ✓ 5th | 0.024470s ✓ 3rd
5000x5000 | 2.493812s 5th | 3.415118s ✓ 6th | 1.651438s ✓ 3rd | 1.646662s ✓ 2nd | 2.010495s ✓ 4th | 0.994217s ✓ 1st
10x10 | 0.000048s 2nd | 0.000055s ✗ 5th | 0.000048s ✓ 3rd | 0.000039s ✓ 1st | 0.000054s ✓ 4th | 0.000055s ✓ 6th
25x20 | 0.000048s 3rd | 0.000058s ✗ 6th | 0.000055s ✓ 4th | 0.000047s ✓ 2nd | 0.000055s ✓ 5th | 0.000047s ✓ 1st
50x50 | 0.000077s 4th | 0.000080s ✗ 6th | 0.000057s ✓ 3rd | 0.000048s ✓ 1st | 0.000078s ✓ 5th | 0.000051s ✓ 2nd
100x150 | 0.000112s 3rd | 0.000635s ✓ 6th | 0.000123s ✓ 4th | 0.000092s ✓ 1st | 0.000588s ✓ 5th | 0.000093s ✓ 2nd
250x250 | 0.000991s 4th | 0.001352s ✓ 6th | 0.000536s ✓ 1st | 0.000536s ✓ 2nd | 0.001200s ✓ 5th | 0.000591s ✓ 3rd
550x500 | 0.003480s 4th | 0.010844s ✓ 5th | 0.001426s ✓ 2nd | 0.001311s ✓ 1st | 0.016003s ✓ 6th | 0.001447s ✓ 3rd
1000x1000 | 0.023240s 4th | 0.026984s ✓ 5th | 0.009923s ✓ 2nd | 0.009682s ✓ 1st | 0.027498s ✓ 6th | 0.011329s ✓ 3rd
2000x2500 | 0.034578s 4th | 1.563681s ✓ 5th | 0.014135s ✓ 2nd | 0.014121s ✓ 1st | 1.596397s ✓ 6th | 0.022706s ✓ 3rd
5000x5000 | 1.070328s 2nd | 3.315799s ✓ 6th | 1.622128s ✓ 4th | 1.628149s ✓ 5th | 1.100956s ✓ 3rd | 0.537018s ✓ 1st
-----------------------------------------------------------------------------------------------------------------------

@@ -688,10 +713,10 @@

🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
1. LAPX LAPJVS : 1073.9525 ms | ✅ | 🥇x2 🥈x3 🥉x3 🏳️x1
2. LAPX LAPJVX : 1723.0752 ms | ✅ | 🥇x3 🥈x3 🥉x2 🚩x1
3. LAPX LAPJV : 1727.8499 ms | ✅ | 🥇x2 🥈x3 🥉x3 🚩x1
4. BASELINE SciPy : 2626.0008 ms | ⭐ | 🥇x1 🥉x1 🚩x2 🏳️x3 🥴x2
5. LAPX LAPJVC : 4935.7815 ms | ✅ | 🥇x1 🚩x4 🏳️x4
6. LAPX LAPJV-IFT : 7227.5912 ms | ⚠️ | 🚩x1 🏳️x1 🥴x7
🎉 ------------------------------------------------------------------------- 🎉
🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
1. LAPX LAPJVS : 573.3374 ms | ✅ | 🥇x2 🥈x2 🥉x4 🥴x1
2. BASELINE SciPy : 1132.9018 ms | ⭐ | 🥈x2 🥉x2 🚩x5
3. LAPX LAPJV : 1648.4320 ms | ✅ | 🥇x1 🥈x3 🥉x2 🚩x3
4. LAPX LAPJVX : 1654.0251 ms | ✅ | 🥇x6 🥈x2 🏳️x1
5. LAPX LAPJVC : 2742.8296 ms | ✅ | 🥉x1 🚩x1 🏳️x4 🥴x3
6. LAPX LAPJV-IFT : 4919.4888 ms | ⚠️ | 🏳️x4 🥴x5
🎉 ------------------------------------------------------------------------- 🎉

@@ -704,13 +729,13 @@

-----------------------------------------------------------------------------------------------------------------------
Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS
Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS
-----------------------------------------------------------------------------------------------------------------------
10x10 | 0.000218s 6th | 0.000129s ✓ 1st | 0.000131s ✓ 3rd | 0.000154s ✓ 5th | 0.000139s ✓ 4th | 0.000131s ✓ 2nd
25x20 | 0.000150s 1st | 0.000178s ✓ 6th | 0.000151s ✓ 2nd | 0.000158s ✓ 4th | 0.000163s ✓ 5th | 0.000155s ✓ 3rd
50x50 | 0.000211s 5th | 0.000194s ✓ 3rd | 0.000283s ✓ 6th | 0.000180s ✓ 1st | 0.000194s ✓ 2nd | 0.000197s ✓ 4th
100x150 | 0.000404s 4th | 0.001266s ✓ 6th | 0.000344s ✓ 3rd | 0.000318s ✓ 1st | 0.000955s ✓ 5th | 0.000341s ✓ 2nd
250x250 | 0.002778s 6th | 0.002573s ✓ 5th | 0.001292s ✓ 2nd | 0.001324s ✓ 3rd | 0.001683s ✓ 4th | 0.001267s ✓ 1st
550x500 | 0.007734s 1st | 0.238647s ✓ 6th | 0.012039s ✓ 4th | 0.011927s ✓ 3rd | 0.040219s ✓ 5th | 0.011922s ✓ 2nd
1000x1000 | 0.046291s 5th | 0.075969s ✓ 6th | 0.036951s ✓ 2nd | 0.037461s ✓ 3rd | 0.039884s ✓ 4th | 0.020911s ✓ 1st
2000x2500 | 0.076470s 4th | 3.556511s ✓ 6th | 0.020127s ✓ 1st | 0.020713s ✓ 2nd | 2.866433s ✓ 5th | 0.023518s ✓ 3rd
5000x5000 | 2.853023s 5th | 2.870481s ✓ 6th | 1.372504s ✓ 3rd | 1.367633s ✓ 2nd | 1.949158s ✓ 4th | 1.205538s ✓ 1st
10x10 | 0.000051s 6th | 0.000045s ✓ 5th | 0.000045s ✓ 4th | 0.000040s ✓ 2nd | 0.000043s ✓ 3rd | 0.000039s ✓ 1st
25x20 | 0.000043s 1st | 0.000055s ✓ 6th | 0.000054s ✓ 4th | 0.000046s ✓ 2nd | 0.000054s ✓ 5th | 0.000046s ✓ 3rd
50x50 | 0.000070s 4th | 0.000076s ✓ 5th | 0.000060s ✓ 3rd | 0.000049s ✓ 1st | 0.000089s ✓ 6th | 0.000054s ✓ 2nd
100x150 | 0.000113s 4th | 0.000646s ✓ 6th | 0.000103s ✓ 3rd | 0.000095s ✓ 2nd | 0.000616s ✓ 5th | 0.000095s ✓ 1st
250x250 | 0.001064s 4th | 0.001522s ✓ 6th | 0.000643s ✓ 2nd | 0.000591s ✓ 1st | 0.001448s ✓ 5th | 0.000673s ✓ 3rd
550x500 | 0.003672s 4th | 0.010797s ✓ 5th | 0.001429s ✓ 2nd | 0.001405s ✓ 1st | 0.015196s ✓ 6th | 0.001497s ✓ 3rd
1000x1000 | 0.019571s 4th | 0.027457s ✓ 6th | 0.010368s ✓ 1st | 0.011375s ✓ 3rd | 0.024061s ✓ 5th | 0.010495s ✓ 2nd
2000x2500 | 0.038530s 4th | 1.654156s ✓ 6th | 0.015500s ✓ 2nd | 0.014464s ✓ 1st | 1.561805s ✓ 5th | 0.022967s ✓ 3rd
5000x5000 | 0.969325s 5th | 1.507703s ✓ 6th | 0.668259s ✓ 3rd | 0.656468s ✓ 2nd | 0.954102s ✓ 4th | 0.475278s ✓ 1st
-----------------------------------------------------------------------------------------------------------------------

@@ -720,10 +745,10 @@

🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
1. LAPX LAPJVS : 1263.9814 ms | ✅ | 🥇x3 🥈x3 🥉x2 🚩x1
2. LAPX LAPJVX : 1439.8675 ms | ✅ | 🥇x2 🥈x2 🥉x3 🚩x1 🏳️x1
3. LAPX LAPJV : 1443.8227 ms | ✅ | 🥇x1 🥈x3 🥉x3 🚩x1 🥴x1
4. BASELINE SciPy : 2987.2792 ms | ⭐ | 🥇x2 🚩x2 🏳️x3 🥴x2
5. LAPX LAPJVC : 4898.8268 ms | ✅ | 🥈x1 🚩x4 🏳️x4
6. LAPX LAPJV-IFT : 6745.9482 ms | ✅ | 🥇x1 🥉x1 🏳️x1 🥴x6
🎉 ------------------------------------------------------------------------- 🎉
🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
1. LAPX LAPJVS : 511.1457 ms | ✅ | 🥇x3 🥈x2 🥉x4
2. LAPX LAPJVX : 684.5318 ms | ✅ | 🥇x4 🥈x4 🥉x1
3. LAPX LAPJV : 696.4601 ms | ✅ | 🥇x1 🥈x3 🥉x3 🚩x2
4. BASELINE SciPy : 1032.4388 ms | ⭐ | 🥇x1 🚩x6 🏳️x1 🥴x1
5. LAPX LAPJVC : 2557.4136 ms | ✅ | 🥉x1 🚩x1 🏳️x5 🥴x2
6. LAPX LAPJV-IFT : 3202.4579 ms | ✅ | 🏳️x3 🥴x6
🎉 ------------------------------------------------------------------------- 🎉

@@ -736,13 +761,13 @@

-----------------------------------------------------------------------------------------------------------------------
Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS
Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS
-----------------------------------------------------------------------------------------------------------------------
10x10 | 0.000233s 6th | 0.000127s ✓ 1st | 0.000158s ✓ 5th | 0.000128s ✓ 2nd | 0.000142s ✓ 4th | 0.000129s ✓ 3rd
25x20 | 0.000144s 1st | 0.000165s ✓ 5th | 0.000172s ✓ 6th | 0.000159s ✓ 4th | 0.000155s ✓ 3rd | 0.000148s ✓ 2nd
50x50 | 0.000269s 6th | 0.000203s ✓ 3rd | 0.000184s ✓ 1st | 0.000191s ✓ 2nd | 0.000223s ✓ 4th | 0.000254s ✓ 5th
100x150 | 0.000417s 4th | 0.001233s ✓ 6th | 0.000308s ✓ 1st | 0.000356s ✓ 3rd | 0.001072s ✓ 5th | 0.000326s ✓ 2nd
250x250 | 0.002866s 5th | 0.003220s ✓ 6th | 0.001664s ✓ 1st | 0.001702s ✓ 3rd | 0.002252s ✓ 4th | 0.001676s ✓ 2nd
550x500 | 0.008314s 1st | 0.249585s ✓ 6th | 0.011429s ✓ 2nd | 0.011442s ✓ 3rd | 0.030332s ✓ 5th | 0.011470s ✓ 4th
1000x1000 | 0.046902s 5th | 0.080581s ✓ 6th | 0.039630s ✓ 2nd | 0.039960s ✓ 3rd | 0.045573s ✓ 4th | 0.038703s ✓ 1st
2000x2500 | 0.074322s 4th | 3.490300s ✓ 6th | 0.020954s ✓ 1st | 0.021199s ✓ 2nd | 2.761126s ✓ 5th | 0.024095s ✓ 3rd
5000x5000 | 2.614220s 5th | 4.553976s ✓ 6th | 2.238387s ✓ 3rd | 2.240301s ✓ 4th | 1.912648s ✓ 2nd | 1.146587s ✓ 1st
10x10 | 0.000049s 6th | 0.000046s ✓ 4th | 0.000046s ✓ 5th | 0.000038s ✓ 1st | 0.000044s ✓ 3rd | 0.000041s ✓ 2nd
25x20 | 0.000040s 1st | 0.000055s ✓ 6th | 0.000052s ✓ 5th | 0.000043s ✓ 2nd | 0.000051s ✓ 4th | 0.000045s ✓ 3rd
50x50 | 0.000067s 4th | 0.000074s ✓ 5th | 0.000058s ✓ 3rd | 0.000053s ✓ 2nd | 0.000081s ✓ 6th | 0.000053s ✓ 1st
100x150 | 0.000117s 2nd | 0.000752s ✓ 6th | 0.000123s ✓ 3rd | 0.000126s ✓ 4th | 0.000721s ✓ 5th | 0.000098s ✓ 1st
250x250 | 0.001063s 4th | 0.001545s ✓ 6th | 0.000447s ✓ 2nd | 0.000445s ✓ 1st | 0.001303s ✓ 5th | 0.000477s ✓ 3rd
550x500 | 0.003711s 4th | 0.011309s ✓ 5th | 0.001524s ✓ 2nd | 0.001460s ✓ 1st | 0.016480s ✓ 6th | 0.001558s ✓ 3rd
1000x1000 | 0.019167s 1st | 0.053561s ✓ 6th | 0.025616s ✓ 3rd | 0.025778s ✓ 4th | 0.023447s ✓ 2nd | 0.027353s ✓ 5th
2000x2500 | 0.035676s 4th | 1.579856s ✓ 5th | 0.014502s ✓ 2nd | 0.014438s ✓ 1st | 1.699035s ✓ 6th | 0.023144s ✓ 3rd
5000x5000 | 1.214213s 5th | 1.230595s ✓ 6th | 0.511229s ✓ 2nd | 0.514490s ✓ 3rd | 1.144982s ✓ 4th | 0.452692s ✓ 1st
-----------------------------------------------------------------------------------------------------------------------

@@ -752,10 +777,10 @@

🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
1. LAPX LAPJVS : 1223.3865 ms | ✅ | 🥇x2 🥈x3 🥉x2 🚩x1 🏳️x1
2. LAPX LAPJV : 2312.8874 ms | ✅ | 🥇x4 🥈x2 🥉x1 🏳️x1 🥴x1
3. LAPX LAPJVX : 2315.4386 ms | ✅ | 🥈x3 🥉x4 🚩x2
4. BASELINE SciPy : 2747.6856 ms | ⭐ | 🥇x2 🚩x2 🏳️x3 🥴x2
5. LAPX LAPJVC : 4753.5226 ms | ✅ | 🥈x1 🥉x1 🚩x4 🏳️x3
6. LAPX LAPJV-IFT : 8379.3885 ms | ✅ | 🥇x1 🥉x1 🏳️x1 🥴x6
🎉 ------------------------------------------------------------------------- 🎉
🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
1. LAPX LAPJVS : 505.4603 ms | ✅ | 🥇x3 🥈x1 🥉x4 🏳️x1
2. LAPX LAPJV : 553.5970 ms | ✅ | 🥈x4 🥉x3 🏳️x2
3. LAPX LAPJVX : 556.8710 ms | ✅ | 🥇x4 🥈x2 🥉x1 🚩x2
4. BASELINE SciPy : 1274.1026 ms | ⭐ | 🥇x2 🥈x1 🚩x4 🏳️x1 🥴x1
5. LAPX LAPJV-IFT : 2877.7913 ms | ✅ | 🚩x1 🏳️x3 🥴x5
6. LAPX LAPJVC : 2886.1434 ms | ✅ | 🥈x1 🥉x1 🚩x2 🏳️x2 🥴x3
🎉 ------------------------------------------------------------------------- 🎉

@@ -768,13 +793,13 @@

-----------------------------------------------------------------------------------------------------------------------
Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS
Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS
-----------------------------------------------------------------------------------------------------------------------
10x10 | 0.000210s 6th | 0.000136s ✓ 4th | 0.000133s ✓ 3rd | 0.000127s ✓ 1st | 0.000142s ✓ 5th | 0.000131s ✓ 2nd
25x20 | 0.000150s 3rd | 0.000203s ✓ 6th | 0.000145s ✓ 1st | 0.000148s ✓ 2nd | 0.000152s ✓ 4th | 0.000178s ✓ 5th
50x50 | 0.000243s 6th | 0.000239s ✓ 5th | 0.000194s ✓ 1st | 0.000201s ✓ 2nd | 0.000233s ✓ 4th | 0.000203s ✓ 3rd
100x150 | 0.000426s 4th | 0.001229s ✓ 6th | 0.000335s ✓ 3rd | 0.000329s ✓ 2nd | 0.001090s ✓ 5th | 0.000311s ✓ 1st
250x250 | 0.002309s 6th | 0.002270s ✓ 5th | 0.001100s ✓ 1st | 0.001198s ✓ 3rd | 0.001776s ✓ 4th | 0.001157s ✓ 2nd
550x500 | 0.007958s 1st | 0.236500s ✓ 6th | 0.012768s ✓ 3rd | 0.012651s ✓ 2nd | 0.039369s ✓ 5th | 0.012789s ✓ 4th
1000x1000 | 0.047147s 5th | 0.089055s ✓ 6th | 0.044446s ✓ 4th | 0.044309s ✓ 3rd | 0.043942s ✓ 2nd | 0.043357s ✓ 1st
2000x2500 | 0.078020s 4th | 3.430561s ✓ 6th | 0.021284s ✓ 1st | 0.021622s ✓ 2nd | 2.844436s ✓ 5th | 0.024936s ✓ 3rd
5000x5000 | 2.490763s 5th | 4.480608s ✓ 6th | 2.195783s ✓ 3rd | 2.198696s ✓ 4th | 2.009281s ✓ 2nd | 1.052344s ✓ 1st
10x10 | 0.000055s 6th | 0.000048s ✓ 5th | 0.000046s ✓ 4th | 0.000037s ✓ 1st | 0.000043s ✓ 3rd | 0.000040s ✓ 2nd
25x20 | 0.000043s 1st | 0.000059s ✓ 6th | 0.000053s ✓ 4th | 0.000045s ✓ 2nd | 0.000055s ✓ 5th | 0.000046s ✓ 3rd
50x50 | 0.000074s 4th | 0.000080s ✓ 5th | 0.000063s ✓ 3rd | 0.000054s ✓ 1st | 0.000088s ✓ 6th | 0.000058s ✓ 2nd
100x150 | 0.000146s 4th | 0.000647s ✓ 5th | 0.000107s ✓ 3rd | 0.000095s ✓ 1st | 0.000714s ✓ 6th | 0.000103s ✓ 2nd
250x250 | 0.000964s 4th | 0.001495s ✓ 6th | 0.000565s ✓ 1st | 0.000603s ✓ 2nd | 0.001220s ✓ 5th | 0.000636s ✓ 3rd
550x500 | 0.003138s 4th | 0.010879s ✓ 5th | 0.001294s ✓ 1st | 0.001329s ✓ 2nd | 0.016092s ✓ 6th | 0.001405s ✓ 3rd
1000x1000 | 0.020857s 3rd | 0.042133s ✓ 6th | 0.019502s ✓ 2nd | 0.019448s ✓ 1st | 0.023370s ✓ 5th | 0.021119s ✓ 4th
2000x2500 | 0.032293s 4th | 1.575432s ✓ 6th | 0.014037s ✓ 1st | 0.014037s ✓ 2nd | 1.482075s ✓ 5th | 0.022823s ✓ 3rd
5000x5000 | 0.974974s 4th | 1.340142s ✓ 6th | 0.564158s ✓ 2nd | 0.570803s ✓ 3rd | 1.116583s ✓ 5th | 0.442339s ✓ 1st
-----------------------------------------------------------------------------------------------------------------------

@@ -784,14 +809,14 @@

🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
1. LAPX LAPJVS : 1135.4053 ms | ✅ | 🥇x3 🥈x2 🥉x2 🚩x1 🏳️x1
2. LAPX LAPJV : 2276.1874 ms | ✅ | 🥇x4 🥉x4 🚩x1
3. LAPX LAPJVX : 2279.2815 ms | ✅ | 🥇x1 🥈x5 🥉x2 🚩x1
4. BASELINE SciPy : 2627.2261 ms | ⭐ | 🥇x1 🥉x1 🚩x2 🏳️x2 🥴x3
5. LAPX LAPJVC : 4940.4219 ms | ✅ | 🥈x2 🚩x3 🏳️x4
6. LAPX LAPJV-IFT : 8240.8002 ms | ✅ | 🚩x1 🏳️x2 🥴x6
🎉 ------------------------------------------------------------------------- 🎉
🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
1. LAPX LAPJVS : 488.5671 ms | ✅ | 🥇x1 🥈x3 🥉x4 🚩x1
2. LAPX LAPJV : 599.8239 ms | ✅ | 🥇x3 🥈x2 🥉x2 🚩x2
3. LAPX LAPJVX : 606.4511 ms | ✅ | 🥇x4 🥈x4 🥉x1
4. BASELINE SciPy : 1032.5424 ms | ⭐ | 🥇x1 🥉x1 🚩x6 🥴x1
5. LAPX LAPJVC : 2640.2397 ms | ✅ | 🥉x1 🏳️x5 🥴x3
6. LAPX LAPJV-IFT : 2970.9133 ms | ✅ | 🏳️x4 🥴x5
🎉 ------------------------------------------------------------------------- 🎉
```
👁️ See more results on various platforms and architectures [here](https://github.com/rathaROG/lapx/actions/runs/18830580672).
👁️ See more results on various platforms and architectures [here](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml).
</details>

@@ -59,25 +59,64 @@ # Copyright (c) 2025 Ratha SIV | MIT License

__version__ = '0.8.1'
from typing import TYPE_CHECKING
import importlib
# Single-matrix solvers
from .lapmod import lapmod
from ._lapjv import (
lapjv,
LARGE_ as LARGE,
FP_1_ as FP_1,
FP_2_ as FP_2,
FP_DYNAMIC_ as FP_DYNAMIC
)
from ._lapjvx import lapjvx, lapjvxa
from ._lapjvc import lapjvc
from .lapjvs import lapjvs, lapjvsa
if TYPE_CHECKING:
# Single-matrix solvers
from ._lapmod_wp import lapmod
from ._lapjv_wp import lapjv
from ._lapjvx_wp import lapjvx, lapjvxa
from ._lapjvc_wp import lapjvc
from ._lapjvs_wp import lapjvs, lapjvsa
# Batch solvers
from ._lapjvx_batch_wp import lapjvx_batch, lapjvxa_batch
from ._lapjvs_batch_wp import lapjvs_batch, lapjvsa_batch
# Constants
from ._lapjv import ( # type: ignore
LARGE_ as LARGE,
FP_1_ as FP_1,
FP_2_ as FP_2,
FP_DYNAMIC_ as FP_DYNAMIC,
)
# Batch solvers
from .lapjvx_batch import lapjvx_batch, lapjvxa_batch
from .lapjvs_batch import lapjvs_batch, lapjvsa_batch
_exports = {
# Single-matrix solvers
'lapmod': ("lap._lapmod_wp", "lapmod"),
'lapjv': ("lap._lapjv_wp", "lapjv"),
'lapjvx': ("lap._lapjvx_wp", "lapjvx"),
'lapjvxa': ("lap._lapjvx_wp", "lapjvxa"),
'lapjvc': ("lap._lapjvc_wp", "lapjvc"),
'lapjvs': ("lap._lapjvs_wp", "lapjvs"),
'lapjvsa': ("lap._lapjvs_wp", "lapjvsa"),
# Batch solvers
'lapjvx_batch': ("lap._lapjvx_batch_wp", "lapjvx_batch"),
'lapjvxa_batch': ("lap._lapjvx_batch_wp", "lapjvxa_batch"),
'lapjvs_batch': ("lap._lapjvs_batch_wp", "lapjvs_batch"),
'lapjvsa_batch': ("lap._lapjvs_batch_wp", "lapjvsa_batch"),
# Constants
'LARGE': ("lap._lapjv", "LARGE_"),
'FP_1': ("lap._lapjv", "FP_1_"),
'FP_2': ("lap._lapjv", "FP_2_"),
'FP_DYNAMIC': ("lap._lapjv", "FP_DYNAMIC_"),
}
def __getattr__(name):
if name in _exports:
mod_path, attr = _exports[name]
mod = importlib.import_module(mod_path)
obj = getattr(mod, attr)
globals()[name] = obj
return obj
raise AttributeError(f"LAPX could not find attribute '{name}'.")
__version__ = '0.9.0'
__author__ = 'Ratha SIV'
__description__ = 'Linear assignment problem solvers, including single and batch solvers.'
__homepage__ = 'https://github.com/rathaROG/lapx'
__all__ = [
'lapjv', 'lapjvx', 'lapjvxa', 'lapjvc', 'lapjvs', 'lapjvsa',
# Single-matrix solvers
'lapmod', 'lapjv', 'lapjvx', 'lapjvxa', 'lapjvc', 'lapjvs', 'lapjvsa',
# Batch solvers
'lapjvx_batch', 'lapjvxa_batch', 'lapjvs_batch', 'lapjvsa_batch',
'lapmod', 'FP_1', 'FP_2', 'FP_DYNAMIC', 'LARGE'
# Constants
'FP_1', 'FP_2', 'FP_DYNAMIC', 'LARGE',
]
Metadata-Version: 2.4
Name: lapx
Version: 0.8.1
Version: 0.9.0
Summary: Linear assignment problem solvers, including single and batch solvers.

@@ -52,9 +52,10 @@ Home-page: https://github.com/rathaROG/lapx

<details><summary>🆕 What's new</summary><br>
<details><summary>🆕 What's new</summary><hr>
- 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) added **`lapjvx_batch()`**, **`lapjvxa_batch()`**, **`lapjvs_batch()`**, **`lapjvsa_batch()`** and **`lapjvsa()`**.
- 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) added **`lapjvs()`**.
- 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) added **`lapjvx()`**, **`lapjvxa()`**, and **`lapjvc()`**.
- 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support.
- Looking for more? See [GitHub releases](https://github.com/rathaROG/lapx/releases).
<sup>- 2025/10/31: [v0.9.0](https://github.com/rathaROG/lapx/releases/tag/v0.9.0) delivered a major stability and performance upgrade accross most solvers. 🚀 </sup><br>
<sup>- 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) added **`lapjvx_batch()`**, **`lapjvxa_batch()`**, **`lapjvs_batch()`**, **`lapjvsa_batch()`** and **`lapjvsa()`**. </sup><br>
<sup>- 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) added **`lapjvs()`**. </sup><br>
<sup>- 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) added **`lapjvx()`**, **`lapjvxa()`**, and **`lapjvc()`**. </sup><br>
<sup>- 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support. </sup><br>
<sup>- Looking for more? See [GitHub releases](https://github.com/rathaROG/lapx/releases). </sup><br>

@@ -65,17 +66,23 @@ </details>

<div align="center">
[![GitHub release](https://img.shields.io/github/release/rathaROG/lapx.svg)](https://github.com/rathaROG/lapx/releases)
[![Test Simple](https://github.com/rathaROG/lapx/actions/workflows/test_simple.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/test_simple.yaml)
[![Benchmark](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml)
[![Test PyPI Build](https://github.com/rathaROG/lapx/actions/workflows/prepublish.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/prepublish.yaml)
[![Publish to PyPI](https://github.com/rathaROG/lapx/actions/workflows/publish.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/publish.yaml)
[![Platforms](https://img.shields.io/badge/platform-windows%20%7C%20linux%20%7C%20macos-gold)](https://pypi.org/project/lapx/#files)
[![Python Versions](https://img.shields.io/pypi/pyversions/lapx.svg)](https://pypi.org/project/lapx/)
# Linear Assignment Problem Solvers
[![Benchmark (Single)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml)
[![Benchmark (Batch)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml)
[![Benchmark (Object Tracking)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml)
[`lapx`](https://github.com/rathaROG/lapx) supports all input cost types — Single ✓ Batch ✓ Square ✓ Rectangular ✓ .
# Linear Assignment Problem Solvers · 𝕏
`lapx` was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a ***Jonker-Volgenant*** solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions.
**Single ✓ Batch ✓ Square ✓ Rectangular ✓**
</div>
[`lapx`](https://github.com/rathaROG/lapx) was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a ***Jonker-Volgenant*** solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions.
<details><summary>Click to read more ...</summary><br>
All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on ***Jonker-Volgenant*** algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core **`lapjv()`** and **`lapmod()`** from scratch based solely on the papers ¹˒² and the public domain Pascal implementation provided by A. Volgenant ³.
All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on ***Jonker-Volgenant*** algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core **`lapjv()`** and **`lapmod()`** from scratch based solely on the papers ¹˒² and the public domain Pascal implementation ³ provided by A. Volgenant.

@@ -93,3 +100,3 @@ <sup>¹ R. Jonker and A. Volgenant, "A Shortest Augmenting Path Algorithm for Dense and Sparse Linear Assignment Problems", Computing 38, 325-340 (1987) </sup><br>

[![Wheels](https://img.shields.io/pypi/wheel/lapx)](https://pypi.org/project/lapx/)
[![PyPI version](https://badge.fury.io/py/lapx.svg)](https://badge.fury.io/py/lapx)
[![PyPI version](https://badge.fury.io/py/lapx.svg?v0.8.1)](https://badge.fury.io/py/lapx)
[![Downloads](https://static.pepy.tech/badge/lapx)](https://pepy.tech/project/lapx)

@@ -102,15 +109,4 @@ [![Downloads](https://static.pepy.tech/badge/lapx/month)](https://pepy.tech/project/lapx)

<details><summary>🛞 Pre-built wheel support</summary>
*The pre-built wheels cover most platforms and architectures, see [details](https://pypi.org/project/lapx/#files).*
| Python | **Windows** ✅ | **Linux** ✅ | **macOS** ✅ |
|:---:|:---:|:---:|:---:|
| 3.7 | AMD64 | x86_64/aarch64 | x86_64 |
| 3.8 | AMD64 | x86_64/aarch64 | x86_64/arm64 |
| 3.9-3.14 ¹ | AMD64/ARM64 ² | x86_64/aarch64 | x86_64/arm64 |
<sup>¹ ⚠️ Pre-built wheels for Python 3.13+ do not support free-threading. </sup><br>
<sup>² ⚠️ Windows ARM64 is experimental. </sup><br>
</details>
<details><summary>🛠️ Other installation options</summary>

@@ -139,2 +135,4 @@

[![Full Tests](https://github.com/rathaROG/lapx/actions/workflows/tests.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/tests.yaml)
### 🅰️ Single-matrix Solvers 📄

@@ -196,3 +194,4 @@

total_cost, row_indices, col_indices = lap.lapjvx(np.random.rand(100, 150), extend_cost=True, return_cost=True)
assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices)))
assignments = np.column_stack((row_indices, col_indices))
# assignments = np.array(list(zip(row_indices, col_indices))) # slower
```

@@ -226,3 +225,4 @@

total_cost, row_indices, col_indices = lap.lapjvc(np.random.rand(100, 150), return_cost=True)
assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices)))
assignments = np.column_stack((row_indices, col_indices))
# assignments = np.array(list(zip(row_indices, col_indices))) # slower
```

@@ -243,3 +243,4 @@

total_cost, row_indices, col_indices = lap.lapjvs(np.random.rand(100, 150), return_cost=True, jvx_like=True)
assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices)))
assignments = np.column_stack((row_indices, col_indices))
# assignments = np.array(list(zip(row_indices, col_indices))) # slower
```

@@ -268,3 +269,3 @@

For see the [docstring](https://github.com/rathaROG/lapx/blob/8d56b42265a23c3b5a290b1039dacaac70dfe60d/lap/lapmod.py#L275) for details.
For see the [`lap/_lapmod_wp.py`](https://github.com/rathaROG/lapx/blob/main/lap/_lapmod_wp.py) for details.

@@ -302,7 +303,4 @@ ```python

print(f"total costs = {costs.sum()}")
# Access the assignment batch b = 7
rows_7 = rows[7] # 1D array of row indices
cols_7 = cols[7] # 1D array of col indices
assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2)
# access the assignments @ batch b = 7
assignments_7 = np.column_stack((rows[7], cols[7])) # (K_b, 2)
print(f"assignments_7.shape = {assignments_7.shape}")

@@ -323,4 +321,3 @@ ```

print(f"total costs = {costs.sum()}")
print(f"len(assignments) = {len(assignments)}")
print(f"assignments[0].shape = {assignments[0].shape}")
print(f"assignments[7].shape = {assignments[7].shape}") # assignments @ batch b = 7
```

@@ -342,7 +339,4 @@

print(f"total costs = {costs.sum()}")
# Access the assignment batch b = 7
rows_7 = rows[7] # 1D array of row indices
cols_7 = cols[7] # 1D array of col indices
assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2)
# access the assignments @ batch b = 7
assignments_7 = np.column_stack((rows[7], cols[7])) # (K_b, 2)
print(f"assignments_7.shape = {assignments_7.shape}")

@@ -365,4 +359,3 @@ ```

print(f"total costs = {costs.sum()}")
print(f"len(assignments) = {len(assignments)}")
print(f"assignments[0].shape = {assignments[0].shape}")
print(f"assignments[7].shape = {assignments[7].shape}") # assignments @ batch b = 7
```

@@ -378,3 +371,3 @@

[![NOTICE](https://img.shields.io/badge/NOTICE-present-blue)](https://github.com/rathaROG/lapx/blob/main/NOTICE)
[![License](https://img.shields.io/pypi/l/lapx.svg)](https://github.com/rathaROG/lapx/blob/main/LICENSE)
[![NOTICE](https://img.shields.io/badge/NOTICE-Present-blue)](https://github.com/rathaROG/lapx/blob/main/NOTICE)
[![LICENSE](https://img.shields.io/badge/LICENSE-MIT-green)](https://github.com/rathaROG/lapx/blob/main/LICENSE)

@@ -9,6 +9,9 @@ LICENSE

lap/__init__.py
lap/lapjvs.py
lap/lapjvs_batch.py
lap/lapjvx_batch.py
lap/lapmod.py
lap/_lapjv_wp.py
lap/_lapjvc_wp.py
lap/_lapjvs_batch_wp.py
lap/_lapjvs_wp.py
lap/_lapjvx_batch_wp.py
lap/_lapjvx_wp.py
lap/_lapmod_wp.py
lapx.egg-info/PKG-INFO

@@ -15,0 +18,0 @@ lapx.egg-info/SOURCES.txt

@@ -8,2 +8,3 @@ include *.md *.in LICENSE NOTICE

prune .vscode
prune benchmarks
prune tests
+41
-48
Metadata-Version: 2.4
Name: lapx
Version: 0.8.1
Version: 0.9.0
Summary: Linear assignment problem solvers, including single and batch solvers.

@@ -52,9 +52,10 @@ Home-page: https://github.com/rathaROG/lapx

<details><summary>🆕 What's new</summary><br>
<details><summary>🆕 What's new</summary><hr>
- 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) added **`lapjvx_batch()`**, **`lapjvxa_batch()`**, **`lapjvs_batch()`**, **`lapjvsa_batch()`** and **`lapjvsa()`**.
- 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) added **`lapjvs()`**.
- 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) added **`lapjvx()`**, **`lapjvxa()`**, and **`lapjvc()`**.
- 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support.
- Looking for more? See [GitHub releases](https://github.com/rathaROG/lapx/releases).
<sup>- 2025/10/31: [v0.9.0](https://github.com/rathaROG/lapx/releases/tag/v0.9.0) delivered a major stability and performance upgrade accross most solvers. 🚀 </sup><br>
<sup>- 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) added **`lapjvx_batch()`**, **`lapjvxa_batch()`**, **`lapjvs_batch()`**, **`lapjvsa_batch()`** and **`lapjvsa()`**. </sup><br>
<sup>- 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) added **`lapjvs()`**. </sup><br>
<sup>- 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) added **`lapjvx()`**, **`lapjvxa()`**, and **`lapjvc()`**. </sup><br>
<sup>- 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support. </sup><br>
<sup>- Looking for more? See [GitHub releases](https://github.com/rathaROG/lapx/releases). </sup><br>

@@ -65,17 +66,23 @@ </details>

<div align="center">
[![GitHub release](https://img.shields.io/github/release/rathaROG/lapx.svg)](https://github.com/rathaROG/lapx/releases)
[![Test Simple](https://github.com/rathaROG/lapx/actions/workflows/test_simple.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/test_simple.yaml)
[![Benchmark](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml)
[![Test PyPI Build](https://github.com/rathaROG/lapx/actions/workflows/prepublish.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/prepublish.yaml)
[![Publish to PyPI](https://github.com/rathaROG/lapx/actions/workflows/publish.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/publish.yaml)
[![Platforms](https://img.shields.io/badge/platform-windows%20%7C%20linux%20%7C%20macos-gold)](https://pypi.org/project/lapx/#files)
[![Python Versions](https://img.shields.io/pypi/pyversions/lapx.svg)](https://pypi.org/project/lapx/)
# Linear Assignment Problem Solvers
[![Benchmark (Single)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml)
[![Benchmark (Batch)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml)
[![Benchmark (Object Tracking)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml)
[`lapx`](https://github.com/rathaROG/lapx) supports all input cost types — Single ✓ Batch ✓ Square ✓ Rectangular ✓ .
# Linear Assignment Problem Solvers · 𝕏
`lapx` was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a ***Jonker-Volgenant*** solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions.
**Single ✓ Batch ✓ Square ✓ Rectangular ✓**
</div>
[`lapx`](https://github.com/rathaROG/lapx) was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a ***Jonker-Volgenant*** solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions.
<details><summary>Click to read more ...</summary><br>
All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on ***Jonker-Volgenant*** algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core **`lapjv()`** and **`lapmod()`** from scratch based solely on the papers ¹˒² and the public domain Pascal implementation provided by A. Volgenant ³.
All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on ***Jonker-Volgenant*** algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core **`lapjv()`** and **`lapmod()`** from scratch based solely on the papers ¹˒² and the public domain Pascal implementation ³ provided by A. Volgenant.

@@ -93,3 +100,3 @@ <sup>¹ R. Jonker and A. Volgenant, "A Shortest Augmenting Path Algorithm for Dense and Sparse Linear Assignment Problems", Computing 38, 325-340 (1987) </sup><br>

[![Wheels](https://img.shields.io/pypi/wheel/lapx)](https://pypi.org/project/lapx/)
[![PyPI version](https://badge.fury.io/py/lapx.svg)](https://badge.fury.io/py/lapx)
[![PyPI version](https://badge.fury.io/py/lapx.svg?v0.8.1)](https://badge.fury.io/py/lapx)
[![Downloads](https://static.pepy.tech/badge/lapx)](https://pepy.tech/project/lapx)

@@ -102,15 +109,4 @@ [![Downloads](https://static.pepy.tech/badge/lapx/month)](https://pepy.tech/project/lapx)

<details><summary>🛞 Pre-built wheel support</summary>
*The pre-built wheels cover most platforms and architectures, see [details](https://pypi.org/project/lapx/#files).*
| Python | **Windows** ✅ | **Linux** ✅ | **macOS** ✅ |
|:---:|:---:|:---:|:---:|
| 3.7 | AMD64 | x86_64/aarch64 | x86_64 |
| 3.8 | AMD64 | x86_64/aarch64 | x86_64/arm64 |
| 3.9-3.14 ¹ | AMD64/ARM64 ² | x86_64/aarch64 | x86_64/arm64 |
<sup>¹ ⚠️ Pre-built wheels for Python 3.13+ do not support free-threading. </sup><br>
<sup>² ⚠️ Windows ARM64 is experimental. </sup><br>
</details>
<details><summary>🛠️ Other installation options</summary>

@@ -139,2 +135,4 @@

[![Full Tests](https://github.com/rathaROG/lapx/actions/workflows/tests.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/tests.yaml)
### 🅰️ Single-matrix Solvers 📄

@@ -196,3 +194,4 @@

total_cost, row_indices, col_indices = lap.lapjvx(np.random.rand(100, 150), extend_cost=True, return_cost=True)
assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices)))
assignments = np.column_stack((row_indices, col_indices))
# assignments = np.array(list(zip(row_indices, col_indices))) # slower
```

@@ -226,3 +225,4 @@

total_cost, row_indices, col_indices = lap.lapjvc(np.random.rand(100, 150), return_cost=True)
assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices)))
assignments = np.column_stack((row_indices, col_indices))
# assignments = np.array(list(zip(row_indices, col_indices))) # slower
```

@@ -243,3 +243,4 @@

total_cost, row_indices, col_indices = lap.lapjvs(np.random.rand(100, 150), return_cost=True, jvx_like=True)
assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices)))
assignments = np.column_stack((row_indices, col_indices))
# assignments = np.array(list(zip(row_indices, col_indices))) # slower
```

@@ -268,3 +269,3 @@

For see the [docstring](https://github.com/rathaROG/lapx/blob/8d56b42265a23c3b5a290b1039dacaac70dfe60d/lap/lapmod.py#L275) for details.
For see the [`lap/_lapmod_wp.py`](https://github.com/rathaROG/lapx/blob/main/lap/_lapmod_wp.py) for details.

@@ -302,7 +303,4 @@ ```python

print(f"total costs = {costs.sum()}")
# Access the assignment batch b = 7
rows_7 = rows[7] # 1D array of row indices
cols_7 = cols[7] # 1D array of col indices
assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2)
# access the assignments @ batch b = 7
assignments_7 = np.column_stack((rows[7], cols[7])) # (K_b, 2)
print(f"assignments_7.shape = {assignments_7.shape}")

@@ -323,4 +321,3 @@ ```

print(f"total costs = {costs.sum()}")
print(f"len(assignments) = {len(assignments)}")
print(f"assignments[0].shape = {assignments[0].shape}")
print(f"assignments[7].shape = {assignments[7].shape}") # assignments @ batch b = 7
```

@@ -342,7 +339,4 @@

print(f"total costs = {costs.sum()}")
# Access the assignment batch b = 7
rows_7 = rows[7] # 1D array of row indices
cols_7 = cols[7] # 1D array of col indices
assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2)
# access the assignments @ batch b = 7
assignments_7 = np.column_stack((rows[7], cols[7])) # (K_b, 2)
print(f"assignments_7.shape = {assignments_7.shape}")

@@ -365,4 +359,3 @@ ```

print(f"total costs = {costs.sum()}")
print(f"len(assignments) = {len(assignments)}")
print(f"assignments[0].shape = {assignments[0].shape}")
print(f"assignments[7].shape = {assignments[7].shape}") # assignments @ batch b = 7
```

@@ -378,3 +371,3 @@

[![NOTICE](https://img.shields.io/badge/NOTICE-present-blue)](https://github.com/rathaROG/lapx/blob/main/NOTICE)
[![License](https://img.shields.io/pypi/l/lapx.svg)](https://github.com/rathaROG/lapx/blob/main/LICENSE)
[![NOTICE](https://img.shields.io/badge/NOTICE-Present-blue)](https://github.com/rathaROG/lapx/blob/main/NOTICE)
[![LICENSE](https://img.shields.io/badge/LICENSE-MIT-green)](https://github.com/rathaROG/lapx/blob/main/LICENSE)
+40
-47

@@ -1,8 +0,9 @@

<details><summary>🆕 What's new</summary><br>
<details><summary>🆕 What's new</summary><hr>
- 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) added **`lapjvx_batch()`**, **`lapjvxa_batch()`**, **`lapjvs_batch()`**, **`lapjvsa_batch()`** and **`lapjvsa()`**.
- 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) added **`lapjvs()`**.
- 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) added **`lapjvx()`**, **`lapjvxa()`**, and **`lapjvc()`**.
- 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support.
- Looking for more? See [GitHub releases](https://github.com/rathaROG/lapx/releases).
<sup>- 2025/10/31: [v0.9.0](https://github.com/rathaROG/lapx/releases/tag/v0.9.0) delivered a major stability and performance upgrade accross most solvers. 🚀 </sup><br>
<sup>- 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) added **`lapjvx_batch()`**, **`lapjvxa_batch()`**, **`lapjvs_batch()`**, **`lapjvsa_batch()`** and **`lapjvsa()`**. </sup><br>
<sup>- 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) added **`lapjvs()`**. </sup><br>
<sup>- 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) added **`lapjvx()`**, **`lapjvxa()`**, and **`lapjvc()`**. </sup><br>
<sup>- 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support. </sup><br>
<sup>- Looking for more? See [GitHub releases](https://github.com/rathaROG/lapx/releases). </sup><br>

@@ -13,17 +14,23 @@ </details>

<div align="center">
[![GitHub release](https://img.shields.io/github/release/rathaROG/lapx.svg)](https://github.com/rathaROG/lapx/releases)
[![Test Simple](https://github.com/rathaROG/lapx/actions/workflows/test_simple.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/test_simple.yaml)
[![Benchmark](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml)
[![Test PyPI Build](https://github.com/rathaROG/lapx/actions/workflows/prepublish.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/prepublish.yaml)
[![Publish to PyPI](https://github.com/rathaROG/lapx/actions/workflows/publish.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/publish.yaml)
[![Platforms](https://img.shields.io/badge/platform-windows%20%7C%20linux%20%7C%20macos-gold)](https://pypi.org/project/lapx/#files)
[![Python Versions](https://img.shields.io/pypi/pyversions/lapx.svg)](https://pypi.org/project/lapx/)
# Linear Assignment Problem Solvers
[![Benchmark (Single)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml)
[![Benchmark (Batch)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml)
[![Benchmark (Object Tracking)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml)
[`lapx`](https://github.com/rathaROG/lapx) supports all input cost types — Single ✓ Batch ✓ Square ✓ Rectangular ✓ .
# Linear Assignment Problem Solvers · 𝕏
`lapx` was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a ***Jonker-Volgenant*** solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions.
**Single ✓ Batch ✓ Square ✓ Rectangular ✓**
</div>
[`lapx`](https://github.com/rathaROG/lapx) was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a ***Jonker-Volgenant*** solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions.
<details><summary>Click to read more ...</summary><br>
All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on ***Jonker-Volgenant*** algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core **`lapjv()`** and **`lapmod()`** from scratch based solely on the papers ¹˒² and the public domain Pascal implementation provided by A. Volgenant ³.
All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on ***Jonker-Volgenant*** algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core **`lapjv()`** and **`lapmod()`** from scratch based solely on the papers ¹˒² and the public domain Pascal implementation ³ provided by A. Volgenant.

@@ -41,3 +48,3 @@ <sup>¹ R. Jonker and A. Volgenant, "A Shortest Augmenting Path Algorithm for Dense and Sparse Linear Assignment Problems", Computing 38, 325-340 (1987) </sup><br>

[![Wheels](https://img.shields.io/pypi/wheel/lapx)](https://pypi.org/project/lapx/)
[![PyPI version](https://badge.fury.io/py/lapx.svg)](https://badge.fury.io/py/lapx)
[![PyPI version](https://badge.fury.io/py/lapx.svg?v0.8.1)](https://badge.fury.io/py/lapx)
[![Downloads](https://static.pepy.tech/badge/lapx)](https://pepy.tech/project/lapx)

@@ -50,15 +57,4 @@ [![Downloads](https://static.pepy.tech/badge/lapx/month)](https://pepy.tech/project/lapx)

<details><summary>🛞 Pre-built wheel support</summary>
*The pre-built wheels cover most platforms and architectures, see [details](https://pypi.org/project/lapx/#files).*
| Python | **Windows** ✅ | **Linux** ✅ | **macOS** ✅ |
|:---:|:---:|:---:|:---:|
| 3.7 | AMD64 | x86_64/aarch64 | x86_64 |
| 3.8 | AMD64 | x86_64/aarch64 | x86_64/arm64 |
| 3.9-3.14 ¹ | AMD64/ARM64 ² | x86_64/aarch64 | x86_64/arm64 |
<sup>¹ ⚠️ Pre-built wheels for Python 3.13+ do not support free-threading. </sup><br>
<sup>² ⚠️ Windows ARM64 is experimental. </sup><br>
</details>
<details><summary>🛠️ Other installation options</summary>

@@ -87,2 +83,4 @@

[![Full Tests](https://github.com/rathaROG/lapx/actions/workflows/tests.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/tests.yaml)
### 🅰️ Single-matrix Solvers 📄

@@ -144,3 +142,4 @@

total_cost, row_indices, col_indices = lap.lapjvx(np.random.rand(100, 150), extend_cost=True, return_cost=True)
assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices)))
assignments = np.column_stack((row_indices, col_indices))
# assignments = np.array(list(zip(row_indices, col_indices))) # slower
```

@@ -174,3 +173,4 @@

total_cost, row_indices, col_indices = lap.lapjvc(np.random.rand(100, 150), return_cost=True)
assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices)))
assignments = np.column_stack((row_indices, col_indices))
# assignments = np.array(list(zip(row_indices, col_indices))) # slower
```

@@ -191,3 +191,4 @@

total_cost, row_indices, col_indices = lap.lapjvs(np.random.rand(100, 150), return_cost=True, jvx_like=True)
assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices)))
assignments = np.column_stack((row_indices, col_indices))
# assignments = np.array(list(zip(row_indices, col_indices))) # slower
```

@@ -216,3 +217,3 @@

For see the [docstring](https://github.com/rathaROG/lapx/blob/8d56b42265a23c3b5a290b1039dacaac70dfe60d/lap/lapmod.py#L275) for details.
For see the [`lap/_lapmod_wp.py`](https://github.com/rathaROG/lapx/blob/main/lap/_lapmod_wp.py) for details.

@@ -250,7 +251,4 @@ ```python

print(f"total costs = {costs.sum()}")
# Access the assignment batch b = 7
rows_7 = rows[7] # 1D array of row indices
cols_7 = cols[7] # 1D array of col indices
assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2)
# access the assignments @ batch b = 7
assignments_7 = np.column_stack((rows[7], cols[7])) # (K_b, 2)
print(f"assignments_7.shape = {assignments_7.shape}")

@@ -271,4 +269,3 @@ ```

print(f"total costs = {costs.sum()}")
print(f"len(assignments) = {len(assignments)}")
print(f"assignments[0].shape = {assignments[0].shape}")
print(f"assignments[7].shape = {assignments[7].shape}") # assignments @ batch b = 7
```

@@ -290,7 +287,4 @@

print(f"total costs = {costs.sum()}")
# Access the assignment batch b = 7
rows_7 = rows[7] # 1D array of row indices
cols_7 = cols[7] # 1D array of col indices
assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2)
# access the assignments @ batch b = 7
assignments_7 = np.column_stack((rows[7], cols[7])) # (K_b, 2)
print(f"assignments_7.shape = {assignments_7.shape}")

@@ -313,4 +307,3 @@ ```

print(f"total costs = {costs.sum()}")
print(f"len(assignments) = {len(assignments)}")
print(f"assignments[0].shape = {assignments[0].shape}")
print(f"assignments[7].shape = {assignments[7].shape}") # assignments @ batch b = 7
```

@@ -326,3 +319,3 @@

[![NOTICE](https://img.shields.io/badge/NOTICE-present-blue)](https://github.com/rathaROG/lapx/blob/main/NOTICE)
[![License](https://img.shields.io/pypi/l/lapx.svg)](https://github.com/rathaROG/lapx/blob/main/LICENSE)
[![NOTICE](https://img.shields.io/badge/NOTICE-Present-blue)](https://github.com/rathaROG/lapx/blob/main/NOTICE)
[![LICENSE](https://img.shields.io/badge/LICENSE-MIT-green)](https://github.com/rathaROG/lapx/blob/main/LICENSE)

@@ -46,72 +46,130 @@ # Tomas Kazmar, 2012-2017, BSD 2-clause license, see LICENSE.

double cost_limit=np.inf, char return_cost=True):
"""Solve linear assignment problem using Jonker-Volgenant algorithm.
"""
Solve the Linear Assignment Problem using the Jonker-Volgenant (JV) algorithm.
Orientation is normalized internally for performance: the native kernel
always sees rows <= cols by transposing when N_rows > N_cols, and results
are mapped back to the original (row, col) orientation before returning.
Parameters
----------
cost: (N,M) ndarray
Cost matrix. Entry `cost[i, j]` is the cost of assigning row `i` to
column `j`.
extend_cost: bool, optional
Whether or not extend a non-square matrix. Default: False.
cost_limit: double, optional
An upper limit for a cost of a single assignment. Default: `np.inf`.
return_cost: bool, optional
Whether or not to return the assignment cost.
cost : (N, M) ndarray
Cost matrix. Entry cost[i, j] is the cost of assigning row i to column j.
Any float dtype is accepted; a contiguous float64 working buffer is used only if needed.
extend_cost : bool, optional (default: False)
Whether to permit non-square inputs via zero-padding to a square matrix.
See the unified augmentation policy below.
cost_limit : float, optional (default: np.inf)
If finite, augment to an (N+M) x (N+M) matrix with sentinel costs
cost_limit/2 and a 0 block in the bottom-right. This models per-edge
reject costs and allows rectangular inputs even when extend_cost=False.
return_cost : bool, optional (default: True)
Whether to return the total assignment cost as the first return value.
Returns
-------
opt: double
Assignment cost. Not returned if `return_cost is False`.
x: (N,) ndarray
Assignment. `x[i]` specifies the column to which row `i` is assigned.
y: (N,) ndarray
Assignment. `y[j]` specifies the row to which column `j` is assigned.
opt : float
Total assignment cost (only if return_cost=True). The total is computed
from the ORIGINAL input array shape (N, M), not the padded/augmented one.
x : (N,) ndarray of int32
x[i] = assigned column index for row i, or -1 if unassigned.
y : (M,) ndarray of int32
y[j] = assigned row index for column j, or -1 if unassigned.
Unified augmentation policy
---------------------------
- If cost_limit < inf: always augment to (N+M) to model per-edge rejects.
Rectangular inputs are allowed regardless of extend_cost.
- Else if (N != M) or extend_cost=True: zero-pad to a square of size max(N, M).
Rectangular inputs are allowed when extend_cost=True.
- Else (square, un-augmented): run on the given square matrix.
Notes
-----
For non-square matrices (with `extend_cost is True`) or `cost_limit` set
low enough, there will be unmatched rows, columns in the solution `x`, `y`.
All such entries are set to -1.
- Single contiguous working buffer: we reuse the input when it's already
float64 C-contiguous and no transpose is needed; otherwise we materialize
exactly one contiguous float64 working array (or its transpose).
- For zero-sized dimensions, the solver returns 0.0 (if requested) and
all -1 mappings with appropriate lengths.
"""
if cost.ndim != 2:
raise ValueError('2-dimensional array expected')
cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] cost_c = \
np.ascontiguousarray(cost, dtype=np.double)
# Original input for final total computation (no copy unless necessary for transpose below)
A = np.asarray(cost)
cdef Py_ssize_t n_rows0 = A.shape[0]
cdef Py_ssize_t n_cols0 = A.shape[1]
# Fast exits for empty dimensions
if n_rows0 == 0 or n_cols0 == 0:
if return_cost:
return 0.0, np.full((n_rows0,), -1, dtype=np.int32), np.full((n_cols0,), -1, dtype=np.int32)
else:
return np.full((n_rows0,), -1, dtype=np.int32), np.full((n_cols0,), -1, dtype=np.int32)
# Normalize orientation: kernel sees rows <= cols
cdef bint transposed = False
cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] B
if n_rows0 > n_cols0:
B = np.ascontiguousarray(A.T, dtype=np.double) # single working buffer (transposed)
transposed = True
else:
if A.dtype == np.float64 and A.flags['C_CONTIGUOUS']:
# reuse input buffer directly
B = A
else:
B = np.ascontiguousarray(A, dtype=np.double) # single working buffer
cdef Py_ssize_t R = B.shape[0] # working rows (<= cols)
cdef Py_ssize_t C = B.shape[1] # working cols
# Permit rectangular when cost_limit < inf (augment) or extend_cost=True (zero-pad); otherwise require square
if R != C and (not extend_cost) and cost_limit == np.inf:
raise ValueError(
'Square cost array expected. If cost is intentionally '
'non-square, pass extend_cost=True or set a finite cost_limit.'
)
cdef uint_t N
cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] cost_c = B
cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] cost_c_extended
cdef uint_t n_rows = cost_c.shape[0]
cdef uint_t n_cols = cost_c.shape[1]
cdef uint_t n = 0
if n_rows == n_cols:
n = n_rows
else:
if not extend_cost:
raise ValueError(
'Square cost array expected. If cost is intentionally '
'non-square, pass extend_cost=True.')
if cost_limit < np.inf:
n = n_rows + n_cols
cost_c_extended = np.empty((n, n), dtype=np.double)
cost_c_extended[:] = cost_limit / 2.
cost_c_extended[n_rows:, n_cols:] = 0
cost_c_extended[:n_rows, :n_cols] = cost_c
# Augment to (R+C)x(R+C) with sentinel edges
N = <uint_t>(R + C)
cost_c_extended = np.empty((R + C, R + C), dtype=np.double)
cost_c_extended[:] = cost_limit / 2.0
cost_c_extended[R:, C:] = 0.0
cost_c_extended[:R, :C] = cost_c
cost_c = cost_c_extended
elif extend_cost:
n = max(n_rows, n_cols)
cost_c_extended = np.zeros((n, n), dtype=np.double)
cost_c_extended[:n_rows, :n_cols] = cost_c
cost_c = cost_c_extended
elif R != C or extend_cost:
# Zero-pad to square max(R, C); if already square and extend_cost=True, keep as-is
N = <uint_t>max(R, C)
if R != C:
cost_c_extended = np.zeros((N, N), dtype=np.double)
cost_c_extended[:R, :C] = cost_c
cost_c = cost_c_extended
else:
N = <uint_t>R
else:
# Square, un-augmented
N = <uint_t>R
cdef double **cost_ptr
cost_ptr = <double **> malloc(n * sizeof(double *))
# Build row-pointer view for kernel
cdef double **cost_ptr = <double **> malloc(N * sizeof(double *))
if cost_ptr == NULL:
raise MemoryError('Out of memory.')
cdef int i
for i in range(n):
for i in range(N):
cost_ptr[i] = &cost_c[i, 0]
cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_c = \
np.empty((n,), dtype=np.int32)
cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_c = \
np.empty((n,), dtype=np.int32)
# Outputs for kernel space (size N)
cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_c = np.empty((N,), dtype=np.int32)
cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_c = np.empty((N,), dtype=np.int32)
cdef int ret = lapjv_internal(n, cost_ptr, &x_c[0], &y_c[0])
cdef int ret
with nogil:
ret = lapjv_internal(N, cost_ptr, &x_c[0], &y_c[0])
free(cost_ptr)
if ret != 0:

@@ -122,17 +180,52 @@ if ret == -1:

cdef double opt = np.nan
if cost_limit < np.inf or extend_cost:
x_c[x_c >= n_cols] = -1
y_c[y_c >= n_rows] = -1
x_c = x_c[:n_rows]
y_c = y_c[:n_cols]
if return_cost:
opt = cost_c[np.nonzero(x_c != -1)[0], x_c[x_c != -1]].sum()
elif return_cost:
opt = cost_c[np.arange(n_rows), x_c].sum()
# Trim to working rectangle (B space) and clean artificial matches
cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_trim
cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_trim
if cost_limit < np.inf or (R != C or extend_cost):
x_c[x_c >= C] = -1
y_c[y_c >= R] = -1
x_trim = x_c[:R]
y_trim = y_c[:C]
else:
x_trim = x_c[:R]
y_trim = y_c[:C]
# Map to ORIGINAL orientation (A space) as vectors x_out (N_rows0), y_out (N_cols0)
x_out = np.full((n_rows0,), -1, dtype=np.int32)
y_out = np.full((n_cols0,), -1, dtype=np.int32)
if not transposed:
mask = (x_trim >= 0)
if np.any(mask):
rows = np.nonzero(mask)[0]
cols = x_trim[mask]
x_out[rows] = cols
y_out[cols] = rows
else:
# B rows are A columns; B cols are A rows
mask = (x_trim >= 0)
if np.any(mask):
rows_b = np.nonzero(mask)[0] # indices in B rows -> A columns
cols_b = x_trim[mask] # indices in B cols -> A rows
row_A = cols_b
col_A = rows_b
x_out[row_A] = col_A
y_out[col_A] = row_A
# Total from ORIGINAL A
cdef double opt = 0.0
if return_cost:
return opt, x_c, y_c
mcost = (x_out >= 0)
if np.any(mcost):
rr = np.nonzero(mcost)[0]
cc = x_out[mcost]
opt = float(A[rr, cc].sum())
else:
opt = 0.0
if return_cost:
return opt, x_out, y_out
else:
return x_c, y_c
return x_out, y_out

@@ -139,0 +232,0 @@

@@ -40,63 +40,92 @@ # _lapjvx.pyx | Wrote on 2025/10/16 by rathaROG

Parameters
----------
cost: (N, M) ndarray
Cost matrix. Entry cost[i, j] is the cost of assigning row i to column j.
extend_cost: bool, optional
Whether or not to extend a non-square matrix. Default: False.
cost_limit: double, optional
An upper limit for a cost of a single assignment. Default: np.inf.
return_cost: bool, optional
Whether or not to return the assignment cost.
Orientation is normalized internally (kernel sees rows <= cols) and results
are mapped back to the original orientation.
Unified augmentation policy
---------------------------
- If cost_limit < inf: augment to (N+M) with cost_limit/2 sentinels (rectangular allowed).
- Elif (N != M) or extend_cost: zero-pad to square max(N, M) (rectangular allowed when extend_cost=True).
- Else (square, un-augmented): run on the given square.
Returns
-------
opt: double
Assignment cost. Not returned if return_cost is False.
row_indices: (K,) ndarray
Indices of assigned rows.
col_indices: (K,) ndarray
Indices of assigned columns.
opt : float
Total cost (if return_cost=True), computed on the ORIGINAL input (not padded).
row_indices : (K,) ndarray (np.where -> int64)
col_indices : (K,) ndarray (sliced from x_c -> int32)
"""
if cost.ndim != 2:
raise ValueError('2-dimensional array expected')
cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] cost_c = \
np.ascontiguousarray(cost, dtype=np.double)
# Original input for final total (no copy unless needed for transpose)
A = np.asarray(cost)
cdef Py_ssize_t n_rows0 = A.shape[0]
cdef Py_ssize_t n_cols0 = A.shape[1]
# Fast exits for empty dims
if n_rows0 == 0 or n_cols0 == 0:
if return_cost:
return 0.0, np.empty((0,), dtype=np.int64), np.empty((0,), dtype=np.int64)
else:
return np.empty((0,), dtype=np.int64), np.empty((0,), dtype=np.int64)
# Normalize orientation: kernel sees rows <= cols
cdef bint transposed = False
cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] B
if n_rows0 > n_cols0:
B = np.ascontiguousarray(A.T, dtype=np.double) # single working buffer (transposed)
transposed = True
else:
if A.dtype == np.float64 and A.flags['C_CONTIGUOUS']:
B = A # reuse input buffer
else:
B = np.ascontiguousarray(A, dtype=np.double) # single working buffer
cdef Py_ssize_t R = B.shape[0]
cdef Py_ssize_t C = B.shape[1]
# Gate: rectangular error only if extend_cost=False and cost_limit==inf
if R != C and (not extend_cost) and cost_limit == np.inf:
raise ValueError(
'Square cost array expected. If cost is intentionally '
'non-square, pass extend_cost=True.'
)
cdef uint_t N
cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] cost_c = B
cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] cost_c_extended
cdef uint_t n_rows = cost_c.shape[0]
cdef uint_t n_cols = cost_c.shape[1]
cdef uint_t n = 0
if n_rows == n_cols:
n = n_rows
else:
if not extend_cost:
raise ValueError(
'Square cost array expected. If cost is intentionally '
'non-square, pass extend_cost=True.')
if cost_limit < np.inf:
n = n_rows + n_cols
cost_c_extended = np.empty((n, n), dtype=np.double)
cost_c_extended[:] = cost_limit / 2.
cost_c_extended[n_rows:, n_cols:] = 0
cost_c_extended[:n_rows, :n_cols] = cost_c
N = <uint_t>(R + C)
cost_c_extended = np.empty((R + C, R + C), dtype=np.double)
cost_c_extended[:] = cost_limit / 2.0
cost_c_extended[R:, C:] = 0.0
cost_c_extended[:R, :C] = cost_c
cost_c = cost_c_extended
elif extend_cost:
n = max(n_rows, n_cols)
cost_c_extended = np.zeros((n, n), dtype=np.double)
cost_c_extended[:n_rows, :n_cols] = cost_c
cost_c = cost_c_extended
elif R != C or extend_cost:
N = <uint_t>max(R, C)
if R != C:
cost_c_extended = np.zeros((N, N), dtype=np.double)
cost_c_extended[:R, :C] = cost_c
cost_c = cost_c_extended
else:
N = <uint_t>R
else:
N = <uint_t>R
cdef double **cost_ptr
cost_ptr = <double **> malloc(n * sizeof(double *))
# Build row-pointer view
cdef double **cost_ptr = <double **> malloc(N * sizeof(double *))
if cost_ptr == NULL:
raise MemoryError('Out of memory when allocating cost_ptr')
cdef int i
for i in range(n):
for i in range(N):
cost_ptr[i] = &cost_c[i, 0]
# Allocate x/y, build cost_ptr as before
cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_c = np.empty((n,), dtype=np.int32)
cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_c = np.empty((n,), dtype=np.int32)
# Allocate x/y
cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_c = np.empty((N,), dtype=np.int32)
cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_c = np.empty((N,), dtype=np.int32)
cdef int ret
with nogil:
ret = lapjv_internal(<uint_t> n, cost_ptr, &x_c[0], &y_c[0])
ret = lapjv_internal(<uint_t> N, cost_ptr, &x_c[0], &y_c[0])

@@ -109,19 +138,34 @@ free(cost_ptr)

cdef double opt = np.nan
if cost_limit < np.inf or extend_cost:
x_c[x_c >= n_cols] = -1
y_c[y_c >= n_rows] = -1
x_c = x_c[:n_rows]
y_c = y_c[:n_cols]
if return_cost:
opt = cost_c[np.nonzero(x_c != -1)[0], x_c[x_c != -1]].sum()
elif return_cost:
opt = cost_c[np.arange(n_rows), x_c].sum()
# Trim to working rectangle (B-space) and clean artificial matches
cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_trim
cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_trim
# Construct row_indices, col_indices - only for assigned rows
assigned_mask = x_c >= 0
row_indices = np.where(assigned_mask)[0]
col_indices = x_c[assigned_mask]
if cost_limit < np.inf or (R != C or extend_cost):
x_c[x_c >= C] = -1
y_c[y_c >= R] = -1
x_trim = x_c[:R]
y_trim = y_c[:C]
else:
x_trim = x_c[:R]
y_trim = y_c[:C]
# Build rows/cols in ORIGINAL orientation (A-space)
rows_b = np.nonzero(x_trim >= 0)[0].astype(np.int64, copy=False)
cols_b = x_trim[x_trim >= 0] # keep int32 dtype
if transposed:
row_indices = cols_b
col_indices = rows_b
else:
row_indices = rows_b
col_indices = cols_b
cdef double opt = 0.0
if return_cost:
if row_indices.size:
opt = float(A[row_indices, col_indices].sum())
else:
opt = 0.0
if return_cost:
return opt, row_indices, col_indices

@@ -140,12 +184,5 @@ else:

"""
Like lapjvx, but returns assignment pairs as a (K,2) np.ndarray of (row, col).
Returns
-------
opt : double
Assignment cost. Not returned if return_cost is False.
assignments : (K, 2) ndarray
Each row is (row_index, col_index).
Like lapjvx, but returns assignment pairs as a (K,2) ndarray of (row, col).
Uses int32 pairs to match legacy behavior.
"""
# We call lapjvx to get the indices
if return_cost:

@@ -152,0 +189,0 @@ opt, row_indices, col_indices = lapjvx(cost, extend_cost=extend_cost,

@@ -7,2 +7,3 @@ #include <pybind11/pybind11.h>

#include <limits>
#include <type_traits>

@@ -12,7 +13,11 @@ #include "dense.hpp"

/**
Updated on 2025/10/16 by rathaROG
Updated by rathaROG:
The return value is changed to include total_cost as the first element of the tuple.
This is to avoid recalculating the total cost in Python, which can be inefficient for
large matrices.
2025/10/30: Improve speed almost 2x for rectangular matrices by using ZERO padding
for dummy rows/columns instead of LARGE_COST padding. This avoids filling the entire
padded area with LARGE_COST and speeds up rectangular cases significantly.
2025/10/16: The return value is changed to include total_cost as the first element of
the tuple. This is to avoid recalculating the total cost in Python, which can be
inefficient for large matrices.
*/

@@ -42,10 +47,10 @@

bool any_finite = false;
T max_abs_cost = 0;
for(int i = 0; i < nrows*ncols; ++i) {
if (std::isfinite((double)data[i])) {
double max_abs_cost_d = 0.0;
for (int i = 0; i < nrows * ncols; ++i) {
// We cast to double for the finiteness check. For integer T this is always finite.
double dv = static_cast<double>(data[i]);
if (std::isfinite(dv)) {
any_finite = true;
// Careful: Note that std::abs() is not a template.
// https://en.cppreference.com/w/cpp/numeric/math/abs
// https://en.cppreference.com/w/cpp/numeric/math/fabs
max_abs_cost = std::max<T>(max_abs_cost, std::abs(data[i]));
// Use fabs on double to avoid template pitfalls and integer overflow on abs(INT_MIN).
max_abs_cost_d = std::max(max_abs_cost_d, std::fabs(dv));
}

@@ -63,15 +68,33 @@ }

const int n = std::max<int>(nrows, ncols);
const T LARGE_COST = 2 * r * max_abs_cost + 1;
std::vector<std::vector<T>> costs(n, std::vector<T>(n, LARGE_COST));
for (int i = 0; i < nrows; i++)
{
T *cptr = data + i*ncols;
for (int j = 0; j < ncols; j++)
{
// Compute a LARGE_COST sentinel for forbidden entries within the original rectangle.
// Use double to compute and clamp if T is integral to avoid overflow.
double large_cost_d = 2.0 * static_cast<double>(r) * max_abs_cost_d + 1.0;
T LARGE_COST;
if constexpr (std::is_floating_point<T>::value) {
LARGE_COST = static_cast<T>(large_cost_d);
} else {
// Clamp to a safe large value for integer types.
using Lim = std::numeric_limits<T>;
double cap = std::min<double>(static_cast<double>(Lim::max()) / 2.0, large_cost_d);
LARGE_COST = static_cast<T>(cap);
}
// Build square cost matrix with ZERO padding for dummy rows/columns (fast for rectangular).
// Only set LARGE_COST for forbidden entries inside the original MxN region.
std::vector<std::vector<T>> costs(n, std::vector<T>(n, T(0)));
for (int i = 0; i < nrows; ++i) {
T *cptr = data + i * ncols;
for (int j = 0; j < ncols; ++j) {
const T c = cptr[j];
if (std::isfinite((double)c))
costs[i][j] = c;
// For floats: non-finite => forbidden. For integers: always finite => allowed.
bool finite = std::isfinite(static_cast<double>(c));
costs[i][j] = finite ? c : LARGE_COST;
}
}
// Note:
// - If nrows < ncols, rows [nrows..n-1] remain zero => dummy rows.
// - If ncols < nrows, cols [ncols..n-1] remain zero => dummy columns.
// This avoids filling the entire padded area with LARGE_COST and speeds up rectangular cases.

@@ -82,12 +105,11 @@ std::vector<int> Lmate, Rmate;

std::vector<int> rowids, colids;
T total_cost = 0; // NEW: accumulate total assignment cost
T total_cost = T(0);
for (int i = 0; i < nrows; i++)
{
// Collect only real (row, col) matches. Exclude dummy columns (j >= ncols) and forbidden.
for (int i = 0; i < nrows; ++i) {
int mate = Lmate[i];
if (Lmate[i] < ncols && costs[i][mate] != LARGE_COST)
{
if (mate >= 0 && mate < ncols && costs[i][mate] != LARGE_COST) {
rowids.push_back(i);
colids.push_back(mate);
total_cost += costs[i][mate]; // Sum up the cost for each assigned pair
total_cost = static_cast<T>(static_cast<double>(total_cost) + static_cast<double>(costs[i][mate]));
}

@@ -97,6 +119,8 @@ }

if (return_cost)
return py::make_tuple(total_cost, py::array(rowids.size(), rowids.data()), py::array(colids.size(), colids.data()));
return py::make_tuple(total_cost,
py::array(rowids.size(), rowids.data()),
py::array(colids.size(), colids.data()));
else
return py::make_tuple(py::array(rowids.size(), rowids.data()), py::array(colids.size(), colids.data()));
return py::make_tuple(py::array(rowids.size(), rowids.data()),
py::array(colids.size(), colids.data()));
}
# Copyright (c) 2025 Ratha SIV | MIT License
import os
import numpy as np
from typing import List, Tuple, Union
from concurrent.futures import ThreadPoolExecutor, as_completed
from .lapjvs import lapjvs as _lapjvs_single
from .lapjvs import lapjvsa as _lapjvsa_single
def _normalize_threads(n_threads: int) -> int:
if n_threads is None or n_threads == 0:
cpu = os.cpu_count() or 1
return max(1, int(cpu))
return max(1, int(n_threads))
def _solve_one_jvs(
a2d: np.ndarray,
extend_cost: bool,
prefer_float32: bool,
jvx_like: bool,
return_cost: bool,
):
# Calls the proven single-instance wrapper; it releases the GIL internally.
return _lapjvs_single(
a2d, extend_cost=extend_cost, return_cost=return_cost,
jvx_like=jvx_like, prefer_float32=prefer_float32
)
def _solve_one_jvsa(
a2d: np.ndarray,
extend_cost: bool,
prefer_float32: bool,
return_cost: bool,
):
return _lapjvsa_single(
a2d, extend_cost=extend_cost, return_cost=return_cost,
prefer_float32=prefer_float32
)
def lapjvs_batch(
costs: np.ndarray,
extend_cost: bool = False,
return_cost: bool = True,
n_threads: int = 0,
prefer_float32: bool = True,
) -> Union[
Tuple[np.ndarray, List[np.ndarray], List[np.ndarray]],
Tuple[List[np.ndarray], List[np.ndarray]]
]:
"""
Stable batched JVS built on the single-instance solver with a thread pool.
- costs: (B, N, M) array-like (float32/float64)
- extend_cost: pad rectangular inputs to square
- return_cost: if True, returns totals (B,) first
- n_threads: 0 -> os.cpu_count(), else exact number of threads
- prefer_float32: forward to single-instance kernel
Returns:
if return_cost:
(totals: (B,), rows_list: List[(K_b,)], cols_list: List[(K_b,)])
else:
(rows_list, cols_list)
"""
A = np.asarray(costs)
if A.ndim != 3:
raise ValueError("3-dimensional array expected [B, N, M]")
B, N, M = A.shape
threads = _normalize_threads(n_threads)
# Preallocate outputs
totals = np.empty((B,), dtype=np.float64) if return_cost else None
rows_list: List[np.ndarray] = [None] * B # type: ignore
cols_list: List[np.ndarray] = [None] * B # type: ignore
# Worker
def work(bi: int):
a2d = A[bi]
if return_cost:
total, rows, cols = _solve_one_jvs(
a2d, extend_cost=extend_cost, prefer_float32=prefer_float32,
jvx_like=True, return_cost=True
)
return bi, total, rows, cols
else:
rows, cols = _solve_one_jvs(
a2d, extend_cost=extend_cost, prefer_float32=prefer_float32,
jvx_like=True, return_cost=False
)
return bi, None, rows, cols
if threads == 1 or B == 1:
# Serial path (still GIL-free inside the solver)
for bi in range(B):
idx, t, r, c = work(bi)
if return_cost:
totals[idx] = float(t) # type: ignore
rows_list[idx] = np.asarray(r, dtype=np.int64)
cols_list[idx] = np.asarray(c, dtype=np.int64)
else:
# Parallel path (ThreadPool is OK because solver releases GIL)
with ThreadPoolExecutor(max_workers=threads) as ex:
futures = [ex.submit(work, bi) for bi in range(B)]
for fut in as_completed(futures):
idx, t, r, c = fut.result()
if return_cost:
totals[idx] = float(t) # type: ignore
rows_list[idx] = np.asarray(r, dtype=np.int64)
cols_list[idx] = np.asarray(c, dtype=np.int64)
if return_cost:
return np.asarray(totals, dtype=np.float64), rows_list, cols_list # type: ignore
else:
return rows_list, cols_list
def lapjvsa_batch(
costs: np.ndarray,
extend_cost: bool = False,
return_cost: bool = True,
n_threads: int = 0,
prefer_float32: bool = True,
) -> Union[Tuple[np.ndarray, List[np.ndarray]], List[np.ndarray]]:
"""
Stable batched JVS (pairs API) built on the single-instance wrapper with a thread pool.
Returns:
if return_cost:
(totals: (B,), pairs_list: List[(K_b, 2)])
else:
(pairs_list,)
"""
A = np.asarray(costs)
if A.ndim != 3:
raise ValueError("3-dimensional array expected [B, N, M]")
B = A.shape[0]
threads = _normalize_threads(n_threads)
totals = np.empty((B,), dtype=np.float64) if return_cost else None
pairs_list: List[np.ndarray] = [None] * B # type: ignore
def work(bi: int):
a2d = A[bi]
if return_cost:
total, pairs = _solve_one_jvsa(
a2d, extend_cost=extend_cost, prefer_float32=prefer_float32, return_cost=True
)
return bi, total, pairs
else:
pairs = _solve_one_jvsa(
a2d, extend_cost=extend_cost, prefer_float32=prefer_float32, return_cost=False
)
return bi, None, pairs
if threads == 1 or B == 1:
for bi in range(B):
idx, t, P = work(bi)
if return_cost:
totals[idx] = float(t) # type: ignore
pairs_list[idx] = np.asarray(P, dtype=np.int64)
else:
with ThreadPoolExecutor(max_workers=threads) as ex:
futures = [ex.submit(work, bi) for bi in range(B)]
for fut in as_completed(futures):
idx, t, P = fut.result()
if return_cost:
totals[idx] = float(t) # type: ignore
pairs_list[idx] = np.asarray(P, dtype=np.int64)
if return_cost:
return np.asarray(totals, dtype=np.float64), pairs_list # type: ignore
else:
return pairs_list
# Copyright (c) 2025 Ratha SIV | MIT License
import numpy as np
from typing import Optional, Tuple, Union
from ._lapjvs import lapjvs_native as _lapjvs_native
from ._lapjvs import lapjvs_float32 as _lapjvs_float32
from ._lapjvs import lapjvsa_native as _lapjvsa_native
from ._lapjvs import lapjvsa_float32 as _lapjvsa_float32
def lapjvs(
cost: np.ndarray,
extend_cost: Optional[bool] = None,
return_cost: bool = True,
jvx_like: bool = True,
prefer_float32: bool = True,
) -> Union[
Tuple[float, np.ndarray, np.ndarray],
Tuple[np.ndarray, np.ndarray]
]:
"""
Solve the Linear Assignment Problem using the 'lapjvs' algorithm.
- Accepts rectangular cost matrices (pad to square internally).
- Returns lapjv-like (x,y) or jvx-like (rows, cols).
- Optional prefer_float32 to reduce memory bandwidth.
Parameters
----------
cost : np.ndarray
Cost matrix of shape (n, m). dtype should be a floating type.
extend_cost : Optional[bool]
If True, treat rectangular inputs by extending (zero-padding) to square.
If False, require square input.
If None (default), auto-detect: extend if n != m.
return_cost : bool
If True (default), return total cost as the first element of the return tuple.
jvx_like : bool
If False (default), return lapjv-style mapping vectors:
- return_cost=True -> (total_cost, x, y)
- return_cost=False -> (x, y)
If True, return lapjvx/SciPy-style arrays:
- return_cost=True -> (total_cost, row_indices, col_indices)
- return_cost=False -> (row_indices, col_indices)
Notes
-----
- The solver kernel may run in float32 (when prefer_float32=True) or native float64,
but the returned total cost is always recomputed from the ORIGINAL input array
to preserve previous numeric behavior and parity with lapjv/lapjvx.
- For rectangular inputs, zero-padding exactly models the rectangular LAP.
"""
# Keep the original array to compute the final cost from it (preserves previous behavior)
a = np.asarray(cost)
if a.ndim != 2:
raise ValueError("cost must be a 2D array")
n, m = a.shape
extend = (n != m) if (extend_cost is None) else bool(extend_cost)
# Choose backend and working dtype for the solver only (ensure contiguity to avoid hidden copies)
use_float32_kernel = not ((prefer_float32 is False) and (a.dtype == np.float64))
if use_float32_kernel:
# Run float32 kernel (casting as needed)
_kernel = _lapjvs_float32
work = np.ascontiguousarray(a, dtype=np.float32)
else:
# Run native kernel on float64 inputs
_kernel = _lapjvs_native
work = np.ascontiguousarray(a, dtype=np.float64)
def _rows_cols_from_x(x_vec: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
if x_vec.size == 0:
return np.empty((0,), dtype=np.int64), np.empty((0,), dtype=np.int64)
mask = x_vec >= 0
rows = np.nonzero(mask)[0]
cols = x_vec[mask]
return rows.astype(np.int64, copy=False), cols.astype(np.int64, copy=False)
if not extend:
# Square: call solver directly on chosen dtype, but compute total from ORIGINAL array
x_raw_obj, y_raw_obj = _kernel(work)
# Convert only what's needed; y conversion deferred if not used
x_raw = np.asarray(x_raw_obj, dtype=np.int64)
if jvx_like:
rows, cols = _rows_cols_from_x(x_raw)
if return_cost:
total = float(a[np.arange(n), x_raw].sum()) if n > 0 else 0.0
return total, rows, cols
else:
return rows, cols
else:
y_raw = np.asarray(y_raw_obj, dtype=np.int64)
if return_cost:
total = float(a[np.arange(n), x_raw].sum()) if n > 0 else 0.0
return total, x_raw, y_raw
else:
return x_raw, y_raw
# Rectangular: zero-pad to square, solve, then trim back; compute total from ORIGINAL array
size = max(n, m)
padded = np.empty((size, size), dtype=work.dtype)
padded[:n, :m] = work
if m < size:
padded[:n, m:] = 0
if n < size:
padded[n:, :] = 0
x_pad_obj, y_pad_obj = _kernel(padded)
x_pad = np.asarray(x_pad_obj, dtype=np.int64)
cols_pad_n = x_pad[:n]
if jvx_like:
if n == 0 or m == 0:
rows = np.empty((0,), dtype=np.int64)
cols = np.empty((0,), dtype=np.int64)
total = 0.0
else:
mask_r = (cols_pad_n >= 0) & (cols_pad_n < m)
rows = np.nonzero(mask_r)[0].astype(np.int64, copy=False)
cols = cols_pad_n[mask_r].astype(np.int64, copy=False)
total = float(a[rows, cols].sum()) if (return_cost and rows.size) else 0.0
return (total, rows, cols) if return_cost else (rows, cols)
# lapjv-like outputs (vectorized)
x_out = np.full(n, -1, dtype=np.int64)
mask_r = (cols_pad_n >= 0) & (cols_pad_n < m)
if mask_r.any():
x_out[mask_r] = cols_pad_n[mask_r]
y_pad = np.asarray(y_pad_obj, dtype=np.int64)
rows_pad_m = y_pad[:m]
y_out = np.full(m, -1, dtype=np.int64)
mask_c = (rows_pad_m >= 0) & (rows_pad_m < n)
if mask_c.any():
y_out[mask_c] = rows_pad_m[mask_c]
if return_cost and n > 0 and m > 0:
mask = (x_out >= 0)
total = float(a[np.nonzero(mask)[0], x_out[mask]].sum()) if mask.any() else 0.0
else:
total = 0.0
return (total, x_out, y_out) if return_cost else (x_out, y_out)
def lapjvsa(
cost: np.ndarray,
extend_cost: Optional[bool] = None,
return_cost: bool = True,
prefer_float32: bool = True,
) -> Union[
Tuple[float, np.ndarray],
np.ndarray
]:
"""
Solve LAP using the 'lapjvs' algorithm (pairs API).
- Accepts rectangular cost matrices (pads to square internally when needed).
- Returns pairs array (K, 2) int64 with optional total cost (computed from ORIGINAL input).
"""
a = np.asarray(cost)
if a.ndim != 2:
raise ValueError("cost must be a 2D array")
n, m = a.shape
extend = (n != m) if (extend_cost is None) else bool(extend_cost)
if not extend:
# Square: call native pairs entry point on chosen dtype
use_f32 = not ((prefer_float32 is False) and (a.dtype == np.float64))
if use_f32:
pairs_obj = _lapjvsa_float32(np.ascontiguousarray(a, dtype=np.float32))
else:
# Follow native dtype; if a is float32 or float64, both ok
# Ensure contiguous to avoid hidden copies
work = np.ascontiguousarray(a, dtype=a.dtype if a.dtype in (np.float32, np.float64) else np.float64)
pairs_obj = _lapjvsa_native(work)
pairs = np.asarray(pairs_obj, dtype=np.int64)
if return_cost:
if pairs.size:
r = pairs[:, 0]; c = pairs[:, 1]
total = float(a[r, c].sum())
else:
total = 0.0
return total, pairs
return pairs
# Rectangular: zero-pad to square, solve pairs on padded, map back, compute total from ORIGINAL
size = max(n, m)
use_f32 = not ((prefer_float32 is False) and (a.dtype == np.float64))
wdtype = np.float32 if use_f32 else (a.dtype if a.dtype in (np.float32, np.float64) else np.float64)
padded = np.empty((size, size), dtype=wdtype)
# copy original submatrix
padded[:n, :m] = a.astype(wdtype, copy=False)
if m < size:
padded[:n, m:] = 0
if n < size:
padded[n:, :] = 0
# Solve pairs on the padded square matrix
if use_f32:
pairs_pad_obj = _lapjvsa_float32(padded)
else:
pairs_pad_obj = _lapjvsa_native(padded)
pairs_pad = np.asarray(pairs_pad_obj, dtype=np.int64)
# Trim pairs to original rectangle
if pairs_pad.size == 0 or n == 0 or m == 0:
pairs = np.empty((0, 2), dtype=np.int64)
total = 0.0
else:
r = pairs_pad[:, 0]
c = pairs_pad[:, 1]
mask = (r >= 0) & (r < n) & (c >= 0) & (c < m)
if mask.any():
pairs = np.stack([r[mask], c[mask]], axis=1).astype(np.int64, copy=False)
total = float(a[pairs[:, 0], pairs[:, 1]].sum()) if return_cost else 0.0
else:
pairs = np.empty((0, 2), dtype=np.int64)
total = 0.0
return (total, pairs) if return_cost else pairs
# Copyright (c) 2025 Ratha SIV | MIT License
import os
import numpy as np
from typing import List, Tuple, Union
from concurrent.futures import ThreadPoolExecutor, as_completed
from ._lapjvx import lapjvx as _lapjvx_single
from ._lapjvx import lapjvxa as _lapjvxa_single
def _normalize_threads(n_threads: int) -> int:
if not n_threads:
return max(1, int(os.cpu_count() or 1))
return max(1, int(n_threads))
def lapjvx_batch(
costs: np.ndarray,
extend_cost: bool = False,
cost_limit: float = np.inf,
return_cost: bool = True,
n_threads: int = 0,
) -> Union[Tuple[np.ndarray, List[np.ndarray], List[np.ndarray]],
Tuple[List[np.ndarray], List[np.ndarray]]]:
A = np.asarray(costs)
if A.ndim != 3:
raise ValueError("3-dimensional array expected [B, N, M]")
B = A.shape[0]
threads = _normalize_threads(n_threads)
totals = np.empty((B,), dtype=np.float64) if return_cost else None
rows_list: List[np.ndarray] = [None] * B # type: ignore
cols_list: List[np.ndarray] = [None] * B # type: ignore
def work(bi: int):
a2d = A[bi]
if return_cost:
total, rows, cols = _lapjvx_single(
a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=True
)
return bi, total, rows, cols
else:
rows, cols = _lapjvx_single(
a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=False
)
return bi, None, rows, cols
if threads == 1 or B == 1:
for bi in range(B):
i, t, r, c = work(bi)
if return_cost:
totals[i] = float(t) # type: ignore
rows_list[i] = np.asarray(r, dtype=np.int64)
cols_list[i] = np.asarray(c, dtype=np.int64)
else:
with ThreadPoolExecutor(max_workers=threads) as ex:
futures = [ex.submit(work, bi) for bi in range(B)]
for fut in as_completed(futures):
i, t, r, c = fut.result()
if return_cost:
totals[i] = float(t) # type: ignore
rows_list[i] = np.asarray(r, dtype=np.int64)
cols_list[i] = np.asarray(c, dtype=np.int64)
if return_cost:
return np.asarray(totals, dtype=np.float64), rows_list, cols_list # type: ignore
return rows_list, cols_list
def lapjvxa_batch(
costs: np.ndarray,
extend_cost: bool = False,
cost_limit: float = np.inf,
return_cost: bool = True,
n_threads: int = 0,
) -> Union[Tuple[np.ndarray, List[np.ndarray]], List[np.ndarray]]:
A = np.asarray(costs)
if A.ndim != 3:
raise ValueError("3-dimensional array expected [B, N, M]")
B = A.shape[0]
threads = _normalize_threads(n_threads)
totals = np.empty((B,), dtype=np.float64) if return_cost else None
pairs_list: List[np.ndarray] = [None] * B # type: ignore
def work(bi: int):
a2d = A[bi]
if return_cost:
total, pairs = _lapjvxa_single(
a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=True
)
return bi, total, pairs
else:
pairs = _lapjvxa_single(
a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=False
)
return bi, None, pairs
if threads == 1 or B == 1:
for bi in range(B):
i, t, P = work(bi)
if return_cost:
totals[i] = float(t) # type: ignore
pairs_list[i] = np.asarray(P, dtype=np.int64)
else:
with ThreadPoolExecutor(max_workers=threads) as ex:
futures = [ex.submit(work, bi) for bi in range(B)]
for fut in as_completed(futures):
i, t, P = fut.result()
if return_cost:
totals[i] = float(t) # type: ignore
pairs_list[i] = np.asarray(P, dtype=np.int64)
if return_cost:
return np.asarray(totals, dtype=np.float64), pairs_list # type: ignore
return pairs_list
# Copyright (c) 2012-2025 Tomas Kazmar | BSD-2-Clause License
import numpy as np
from bisect import bisect_left
# import logging
from ._lapjv import _lapmod, FP_DYNAMIC_ as FP_DYNAMIC, LARGE_ as LARGE
def _pycrrt(n, cc, ii, kk, free_rows, x, y, v):
# log = logging.getLogger('do_column_reduction_and_reduction_transfer')
x[:] = -1
y[:] = -1
v[:] = LARGE
for i in range(n):
ks = slice(ii[i], ii[i+1])
js = kk[ks]
ccs = cc[ks]
mask = ccs < v[js]
js = js[mask]
v[js] = ccs[mask]
y[js] = i
# log.debug('v = %s', v)
# for j in range(cost.shape[1]):
unique = np.empty((n,), dtype=bool)
unique[:] = True
for j in range(n-1, -1, -1):
i = y[j]
# If row is not taken yet, initialize it with the minimum stored in y.
if x[i] < 0:
x[i] = j
else:
unique[i] = False
y[j] = -1
# log.debug('bw %s %s %s %s', i, j, x, y)
# log.debug('unique %s', unique)
n_free_rows = 0
for i in range(n):
# Store unassigned row i.
if x[i] < 0:
free_rows[n_free_rows] = i
n_free_rows += 1
elif unique[i] and ii[i+1] - ii[i] > 1:
# >1 check prevents choking on rows with a single entry
# Transfer from an assigned row.
j = x[i]
# Find the current 2nd minimum of the reduced column costs:
# (cost[i,j] - v[j]) for some j.
ks = slice(ii[i], ii[i+1])
js = kk[ks]
minv = np.min(cc[ks][js != j] - v[js][js != j])
# log.debug("v[%d] = %f - %f", j, v[j], minv)
v[j] -= minv
# log.debug('free: %s', free_rows[:n_free_rows])
# log.debug('%s %s', x, v)
return n_free_rows
def find_minima(indices, values):
if len(indices) > 0:
j1 = indices[0]
v1 = values[0]
else:
j1 = 0
v1 = LARGE
j2 = -1
v2 = LARGE
# log = logging.getLogger('find_minima')
# log.debug(sorted(zip(values, indices))[:2])
for j, h in zip(indices[1:], values[1:]):
# log.debug('%d = %f %d = %f', j1, v1, j2, v2)
if h < v2:
if h >= v1:
v2 = h
j2 = j
else:
v2 = v1
v1 = h
j2 = j1
j1 = j
# log.debug('%d = %f %d = %f', j1, v1, j2, v2)
return j1, v1, j2, v2
def _pyarr(n, cc, ii, kk, n_free_rows, free_rows, x, y, v):
# log = logging.getLogger('do_augmenting_row_reduction')
# log.debug('%s %s %s', x, y, v)
current = 0
# log.debug('free: %s', free_rows[:n_free_rows])
new_free_rows = 0
while current < n_free_rows:
free_i = free_rows[current]
# log.debug('current = %d', current)
current += 1
ks = slice(ii[free_i], ii[free_i+1])
js = kk[ks]
j1, v1, j2, v2 = find_minima(js, cc[ks] - v[js])
i0 = y[j1]
v1_new = v[j1] - (v2 - v1)
v1_lowers = v1_new < v[j1]
# log.debug(
# '%d %d 1=%s,%f 2=%s,%f %f %s',
# free_i, i0, j1, v1, j2, v2, v1_new, v1_lowers)
if v1_lowers:
v[j1] = v1_new
elif i0 >= 0 and j2 != -1: # i0 is assigned, try j2
j1 = j2
i0 = y[j2]
x[free_i] = j1
y[j1] = free_i
if i0 >= 0:
if v1_lowers:
current -= 1
# log.debug('continue augmenting path from current %s %s %s')
free_rows[current] = i0
else:
# log.debug('stop the augmenting path and keep for later')
free_rows[new_free_rows] = i0
new_free_rows += 1
# log.debug('free: %s', free_rows[:new_free_rows])
return new_free_rows
def binary_search(data, key):
# log = logging.getLogger('binary_search')
i = bisect_left(data, key)
# log.debug('Found data[%d]=%d for %d', i, data[i], key)
if i < len(data) and data[i] == key:
return i
else:
return None
def _find(hi, d, cols, y):
lo, hi = hi, hi + 1
minv = d[cols[lo]]
# XXX: anytime this happens to be NaN, i'm screwed...
# assert not np.isnan(minv)
for k in range(hi, len(cols)):
j = cols[k]
if d[j] <= minv:
# New minimum found, trash the new SCAN columns found so far.
if d[j] < minv:
hi = lo
minv = d[j]
cols[k], cols[hi] = cols[hi], j
hi += 1
return minv, hi, cols
def _scan(n, cc, ii, kk, minv, lo, hi, d, cols, pred, y, v):
# log = logging.getLogger('_scan')
# Scan all TODO columns.
while lo != hi:
j = cols[lo]
lo += 1
i = y[j]
# log.debug('?%d kk[%d:%d]=%s', j, ii[i], ii[i+1], kk[ii[i]:ii[i+1]])
kj = binary_search(kk[ii[i]:ii[i+1]], j)
if kj is None:
continue
kj = ii[i] + kj
h = cc[kj] - v[j] - minv
# log.debug('i=%d j=%d kj=%s h=%f', i, j, kj, h)
for k in range(hi, n):
j = cols[k]
kj = binary_search(kk[ii[i]:ii[i+1]], j)
if kj is None:
continue
kj = ii[i] + kj
cred_ij = cc[kj] - v[j] - h
if cred_ij < d[j]:
d[j] = cred_ij
pred[j] = i
if cred_ij == minv:
if y[j] < 0:
return j, None, None, d, cols, pred
cols[k] = cols[hi]
cols[hi] = j
hi += 1
return -1, lo, hi, d, cols, pred
def find_path(n, cc, ii, kk, start_i, y, v):
# log = logging.getLogger('find_path')
cols = np.arange(n, dtype=int)
pred = np.empty((n,), dtype=int)
pred[:] = start_i
d = np.empty((n,), dtype=float)
d[:] = LARGE
ks = slice(ii[start_i], ii[start_i+1])
js = kk[ks]
d[js] = cc[ks] - v[js]
# log.debug('d = %s', d)
minv = LARGE
lo, hi = 0, 0
n_ready = 0
final_j = -1
while final_j == -1:
# No SCAN columns, find new ones.
if lo == hi:
# log.debug('%d..%d -> find', lo, hi)
# log.debug('cols = %s', cols)
n_ready = lo
minv, hi, cols = _find(hi, d, cols, y)
# log.debug('%d..%d -> check', lo, hi)
# log.debug('cols = %s', cols)
# log.debug('y = %s', y)
for h in range(lo, hi):
# If any of the new SCAN columns is unassigned, use it.
if y[cols[h]] < 0:
final_j = cols[h]
if final_j == -1:
# log.debug('%d..%d -> scan', lo, hi)
final_j, lo, hi, d, cols, pred = _scan(
n, cc, ii, kk, minv, lo, hi, d, cols, pred, y, v)
# log.debug('d = %s', d)
# log.debug('cols = %s', cols)
# log.debug('pred = %s', pred)
# Update prices for READY columns.
for k in range(n_ready):
j0 = cols[k]
v[j0] += d[j0] - minv
assert final_j >= 0
assert final_j < n
return final_j, pred
def _pya(n, cc, ii, kk, n_free_rows, free_rows, x, y, v):
# log = logging.getLogger('augment')
for free_i in free_rows[:n_free_rows]:
# log.debug('looking at free_i=%s', free_i)
j, pred = find_path(n, cc, ii, kk, free_i, y, v)
# Augment the path starting from column j and backtracking to free_i.
i = -1
while i != free_i:
# log.debug('augment %s', j)
# log.debug('pred = %s', pred)
i = pred[j]
assert i >= 0
assert i < n
# log.debug('y[%d]=%d -> %d', j, y[j], i)
y[j] = i
j, x[i] = x[i], j
def check_cost(n, cc, ii, kk):
if n == 0:
raise ValueError('Cost matrix has zero rows.')
if len(kk) == 0:
raise ValueError('Cost matrix has zero columns.')
lo = cc.min()
hi = cc.max()
if lo < 0:
raise ValueError('Cost matrix values must be non-negative.')
if hi >= LARGE:
raise ValueError(
'Cost matrix values must be less than %s' % LARGE)
def get_cost(n, cc, ii, kk, x0):
ret = 0
for i, j in enumerate(x0):
kj = binary_search(kk[ii[i]:ii[i+1]], j)
if kj is None:
return np.inf
kj = ii[i] + kj
ret += cc[kj]
return ret
def lapmod(n, cc, ii, kk, fast=True, return_cost=True, fp_version=FP_DYNAMIC):
"""Solve sparse linear assignment problem using Jonker-Volgenant algorithm.
n: number of rows of the assignment cost matrix
cc: 1D array of all finite elements of the assignement cost matrix
ii: 1D array of indices of the row starts in cc. The following must hold:
ii[0] = 0 and ii[n+1] = len(cc).
kk: 1D array of the column indices so that:
cost[i, kk[ii[i] + k]] == cc[ii[i] + k].
Indices within one row must be sorted.
extend_cost: whether or not extend a non-square matrix [default: False]
cost_limit: an upper limit for a cost of a single assignment
[default: np.inf]
return_cost: whether or not to return the assignment cost
Returns (opt, x, y) where:
opt: cost of the assignment
x: vector of columns assigned to rows
y: vector of rows assigned to columns
or (x, y) if return_cost is not True.
When extend_cost and/or cost_limit is set, all unmatched entries will be
marked by -1 in x/y.
"""
# log = logging.getLogger('lapmod')
check_cost(n, cc, ii, kk)
if fast is True:
# log.debug('[----CR & RT & ARR & augmentation ----]')
x, y = _lapmod(n, cc, ii, kk, fp_version=fp_version)
else:
cc = np.ascontiguousarray(cc, dtype=np.float64)
ii = np.ascontiguousarray(ii, dtype=np.int32)
kk = np.ascontiguousarray(kk, dtype=np.int32)
x = np.empty((n,), dtype=np.int32)
y = np.empty((n,), dtype=np.int32)
v = np.empty((n,), dtype=np.float64)
free_rows = np.empty((n,), dtype=np.int32)
# log.debug('[----Column reduction & reduction transfer----]')
n_free_rows = _pycrrt(n, cc, ii, kk, free_rows, x, y, v)
# log.debug(
# 'free, x, y, v: %s %s %s %s', free_rows[:n_free_rows], x, y, v)
if n_free_rows == 0:
# log.info('Reduction solved it.')
if return_cost is True:
return get_cost(n, cc, ii, kk, x), x, y
else:
return x, y
for it in range(2):
# log.debug('[---Augmenting row reduction (iteration: %d)---]', it)
n_free_rows = _pyarr(
n, cc, ii, kk, n_free_rows, free_rows, x, y, v)
# log.debug(
# 'free, x, y, v: %s %s %s %s', free_rows[:n_free_rows], x, y, v)
if n_free_rows == 0:
# log.info('Augmenting row reduction solved it.')
if return_cost is True:
return get_cost(n, cc, ii, kk, x), x, y
else:
return x, y
# log.info('[----Augmentation----]')
_pya(n, cc, ii, kk, n_free_rows, free_rows, x, y, v)
# log.debug('x, y, v: %s %s %s', x, y, v)
if return_cost is True:
return get_cost(n, cc, ii, kk, x), x, y
else:
return x, y

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display