# Copyright (c) 2025 Ratha SIV \| MIT License

		import numpy as np
		from typing import Tuple, Union

		from ._lapjv import lapjv as _lapjv


		def lapjv(
		cost: np.ndarray,
		extend_cost: bool = False,
		cost_limit: float = np.inf,
		return_cost: bool = True,
		) -> Union[
		Tuple[float, np.ndarray, np.ndarray],
		Tuple[np.ndarray, np.ndarray],
		]:
		"""
		Solve the Linear Assignment Problem using the Jonker-Volgenant (JV) algorithm.

		This wrapper returns lapjv-style mapping vectors (x, y), where:
		- x[i] is the assigned column index for row i, or -1 if unassigned.
		- y[j] is the assigned row index for column j, or -1 if unassigned.

		Parameters
		----------
		cost : np.ndarray, shape (N, M)
		2D cost matrix. Entry cost[i, j] is the cost of assigning row i to column j.
		Any float dtype is accepted; internally a single contiguous float64 buffer is used when needed.
		extend_cost : bool, default False
		Permit rectangular inputs by zero-padding to a square matrix.
		See the unified augmentation policy below.
		cost_limit : float, default np.inf
		When finite, the solver augments to size (N+M) with sentinel edges of cost_limit/2
		and a bottom-right zero block. This models a per-edge "reject" cost and allows
		rectangular inputs even if extend_cost=False.
		return_cost : bool, default True
		If True, include the total assignment cost as the first return value.
		The total is computed from the ORIGINAL (un-augmented/unpadded) input array.

		Returns
		-------
		If return_cost is True:
		total_cost : float
		Sum of costs over matched pairs, computed on the ORIGINAL input.
		x : np.ndarray[int32] with shape (N,)
		Mapping from rows to columns; -1 for unassigned rows.
		y : np.ndarray[int32] with shape (M,)
		Mapping from columns to rows; -1 for unassigned columns.
		Else:
		x : np.ndarray[int32] with shape (N,)
		y : np.ndarray[int32] with shape (M,)

		Unified augmentation policy
		---------------------------
		- If cost_limit < inf: always augment to (N+M) to model per-edge rejects (rectangular allowed).
		- Else if (N != M) or extend_cost=True: zero-pad to a square of size max(N, M).
		- Else (square, un-augmented): run on the given square matrix.

		Notes
		-----
		- Orientation is normalized internally (kernel works with rows <= cols); outputs are mapped
		back to the ORIGINAL orientation before returning.
		- For zero-sized dimensions, the solver returns 0.0 (if requested) and all -1 mappings.
		- This wrapper forwards directly to the Cython implementation without altering dtypes.
		"""
		return _lapjv(cost, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=return_cost)

+51

lap/_lapjvc_wp.py

		# Copyright (c) 2025 Ratha SIV \| MIT License

		import numpy as np
		from typing import Tuple, Union

		from ._lapjvc import lapjvc as _lapjvc # type: ignore


		def lapjvc(
		cost: np.ndarray,
		return_cost: bool = True,
		) -> Union[
		Tuple[float, np.ndarray, np.ndarray],
		Tuple[np.ndarray, np.ndarray],
		]:
		"""
		Solve the Linear Assignment Problem using the classic dense Jonker-Volgenant algorithm.

		This is a thin wrapper around the C++ binding that computes an optimal assignment for a 2D
		cost matrix. It returns row/column index arrays (JVX-like) matching SciPy's
		linear_sum_assignment ordering.

		Parameters
		----------
		cost : np.ndarray, shape (M, N)
		2D cost matrix. Supported dtypes: int32, int64, float32, float64.
		- Rectangular inputs are handled internally (the dense solver pads as needed).
		- NaN entries (for float types) are treated as forbidden assignments.
		return_cost : bool, default True
		If True, return (total_cost, row_indices, col_indices).
		If False, return only (row_indices, col_indices).

		Returns
		-------
		If return_cost is True:
		total_cost : float
		Sum of cost at the selected (row, col) pairs.
		row_indices : np.ndarray with shape (K,), dtype int64 (platform-dependent via NumPy)
		Row indices of the assignment.
		col_indices : np.ndarray with shape (K,), dtype int64 (platform-dependent via NumPy)
		Column indices of the assignment.
		Else:
		row_indices, col_indices

		Notes
		-----
		- This is the classic dense JV routine; for very large, sparse, or otherwise
		structured problems, consider using lapjv/lapjvx variants optimized for those cases.
		- Forbidden assignments can be encoded with np.nan (float inputs).
		"""
		return _lapjvc(cost, return_cost=return_cost)

+244

lap/_lapjvs_batch_wp.py

		# Copyright (c) 2025 Ratha SIV \| MIT License

		import os
		import numpy as np
		from typing import List, Tuple, Union
		from concurrent.futures import ThreadPoolExecutor, as_completed

		from ._lapjvs_wp import lapjvs as _lapjvs_single
		from ._lapjvs_wp import lapjvsa as _lapjvsa_single


		def _normalize_threads(n_threads: int) -> int:
		if n_threads is None or n_threads == 0:
		cpu = os.cpu_count() or 1
		return max(1, int(cpu))
		return max(1, int(n_threads))


		def _solve_one_jvs(
		a2d: np.ndarray,
		extend_cost: bool,
		prefer_float32: bool,
		jvx_like: bool,
		return_cost: bool,
		):
		# Calls the proven single-instance wrapper; it releases the GIL internally.
		return _lapjvs_single(
		a2d, extend_cost=extend_cost, return_cost=return_cost,
		jvx_like=jvx_like, prefer_float32=prefer_float32
		)


		def _solve_one_jvsa(
		a2d: np.ndarray,
		extend_cost: bool,
		prefer_float32: bool,
		return_cost: bool,
		):
		return _lapjvsa_single(
		a2d, extend_cost=extend_cost, return_cost=return_cost,
		prefer_float32=prefer_float32
		)


		def lapjvs_batch(
		costs: np.ndarray,
		extend_cost: bool = False,
		return_cost: bool = True,
		n_threads: int = 0,
		prefer_float32: bool = True,
		) -> Union[
		Tuple[np.ndarray, List[np.ndarray], List[np.ndarray]],
		Tuple[List[np.ndarray], List[np.ndarray]]
		]:
		"""
		Batched lapjvs solver with a thread pool.

		For each 2D cost matrix in a 3D batch, this function runs `lapjvs` and
		aggregates the per-instance results. It preserves the order of the batch
		in the outputs.

		Parameters
		----------
		costs : np.ndarray, shape (B, N, M)
		Batch of cost matrices (float32/float64). Each slice `costs[b]` is
		a single LAP instance.
		extend_cost : bool, default False
		If True, rectangular instances are solved via internal zero-padding.
		If False, each instance must be square or a ValueError is raised.
		This is forwarded to the single-instance solver.
		return_cost : bool, default True
		If True, return per-instance totals as the first output.
		n_threads : int, default 0
		Number of worker threads. When 0 or None, uses `os.cpu_count()`.
		Actual workers are capped to the batch size.
		prefer_float32 : bool, default True
		Hint to run each kernel in float32 (forwarded to the single solver).

		Returns
		-------
		If return_cost is True:
		totals : np.ndarray, shape (B,), float64
		Total assignment cost for each instance, computed from the ORIGINAL
		per-instance cost matrix.
		rows_list : List[np.ndarray[int64]]
		For each b, a 1D array of assigned row indices (length K_b).
		cols_list : List[np.ndarray[int64]]
		For each b, a 1D array of assigned col indices (length K_b).
		Else:
		rows_list, cols_list

		Raises
		------
		ValueError
		- If `costs` is not a 3D array.
		- If any instance is rectangular while `extend_cost=False`.

		Notes
		-----
		- Threading:
		The underlying native kernel releases the GIL, so using multiple threads
		can accelerate large batches on multi-core systems.
		- Dtypes:
		Each instance may be float32 or float64; the kernel selection and casting
		follow the single-instance rules. Totals are float64.
		"""
		A = np.asarray(costs)
		if A.ndim != 3:
		raise ValueError("3-dimensional array expected [B, N, M]")

		B = A.shape[0]
		threads = _normalize_threads(n_threads)

		totals = np.empty((B,), dtype=np.float64) if return_cost else None
		rows_list: List[np.ndarray] = [None] * B # type: ignore
		cols_list: List[np.ndarray] = [None] * B # type: ignore

		def work(bi: int):
		a2d = A[bi]
		if return_cost:
		total, rows, cols = _solve_one_jvs( # type: ignore
		a2d, extend_cost=extend_cost, prefer_float32=prefer_float32,
		jvx_like=True, return_cost=True
		)
		return bi, total, rows, cols
		else:
		rows, cols = _solve_one_jvs( # type: ignore
		a2d, extend_cost=extend_cost, prefer_float32=prefer_float32,
		jvx_like=True, return_cost=False
		)
		return bi, None, rows, cols

		if threads == 1 or B == 1:
		for bi in range(B):
		idx, t, r, c = work(bi)
		if return_cost:
		totals[idx] = float(t) # type: ignore
		rows_list[idx] = np.asarray(r, dtype=np.int64)
		cols_list[idx] = np.asarray(c, dtype=np.int64)
		else:
		# Cap workers to the batch size
		with ThreadPoolExecutor(max_workers=min(threads, B)) as ex:
		futures = [ex.submit(work, bi) for bi in range(B)]
		for fut in as_completed(futures):
		idx, t, r, c = fut.result()
		if return_cost:
		totals[idx] = float(t) # type: ignore
		rows_list[idx] = np.asarray(r, dtype=np.int64)
		cols_list[idx] = np.asarray(c, dtype=np.int64)

		if return_cost:
		return np.asarray(totals, dtype=np.float64), rows_list, cols_list # type: ignore
		else:
		return rows_list, cols_list


		def lapjvsa_batch(
		costs: np.ndarray,
		extend_cost: bool = False,
		return_cost: bool = True,
		n_threads: int = 0,
		prefer_float32: bool = True,
		) -> Union[Tuple[np.ndarray, List[np.ndarray]], List[np.ndarray]]:
		"""
		Batched lapjvsa solver, returning (K_b, 2) arrays per instance.

		Runs `lapjvsa` on each (N, M) slice of a (B, N, M) batch and aggregates
		the results while preserving order.

		Parameters
		----------
		costs : np.ndarray, shape (B, N, M)
		Batch of cost matrices.
		extend_cost : bool, default False
		If True, rectangular instances are solved via internal zero-padding.
		return_cost : bool, default True
		If True, include per-instance totals as the first returned array.
		n_threads : int, default 0
		Number of worker threads. 0 or None uses `os.cpu_count()`.
		prefer_float32 : bool, default True
		Forwarded to the single-instance solver.

		Returns
		-------
		If return_cost is True:
		totals : np.ndarray, shape (B,), float64
		pairs_list : List[np.ndarray[int64] with shape (K_b, 2)]
		Else:
		pairs_list

		Raises
		------
		ValueError
		If `costs` is not a 3D array, or if any instance is rectangular while
		`extend_cost=False`.

		Notes
		-----
		- See `lapjvsa` for details on dtype handling and total-cost accumulation.
		- Results are reassembled in batch order irrespective of threading.
		"""
		A = np.asarray(costs)
		if A.ndim != 3:
		raise ValueError("3-dimensional array expected [B, N, M]")

		B = A.shape[0]
		threads = _normalize_threads(n_threads)

		totals = np.empty((B,), dtype=np.float64) if return_cost else None
		pairs_list: List[np.ndarray] = [None] * B # type: ignore

		def work(bi: int):
		a2d = A[bi]
		if return_cost:
		total, pairs = _solve_one_jvsa(
		a2d, extend_cost=extend_cost, prefer_float32=prefer_float32, return_cost=True
		)
		return bi, total, pairs
		else:
		pairs = _solve_one_jvsa(
		a2d, extend_cost=extend_cost, prefer_float32=prefer_float32, return_cost=False
		)
		return bi, None, pairs

		if threads == 1 or B == 1:
		for bi in range(B):
		idx, t, P = work(bi)
		if return_cost:
		totals[idx] = float(t) # type: ignore
		pairs_list[idx] = np.asarray(P, dtype=np.int64)
		else:
		# Cap workers to the batch size
		with ThreadPoolExecutor(max_workers=min(threads, B)) as ex:
		futures = [ex.submit(work, bi) for bi in range(B)]
		for fut in as_completed(futures):
		idx, t, P = fut.result()
		if return_cost:
		totals[idx] = float(t) # type: ignore
		pairs_list[idx] = np.asarray(P, dtype=np.int64)

		if return_cost:
		return np.asarray(totals, dtype=np.float64), pairs_list # type: ignore
		else:
		return pairs_list

+338

lap/_lapjvs_wp.py

		# Copyright (c) 2025 Ratha SIV \| MIT License

		import numpy as np
		from typing import Optional, Tuple, Union

		from ._lapjvs import lapjvs_native as _lapjvs_native # type: ignore
		from ._lapjvs import lapjvs_float32 as _lapjvs_float32 # type: ignore
		from ._lapjvs import lapjvsa_native as _lapjvsa_native # type: ignore
		from ._lapjvs import lapjvsa_float32 as _lapjvsa_float32 # type: ignore


		def lapjvs(
		cost: np.ndarray,
		extend_cost: Optional[bool] = None,
		return_cost: bool = True,
		jvx_like: bool = True,
		prefer_float32: bool = True,
		) -> Union[
		Tuple[float, np.ndarray, np.ndarray],
		Tuple[np.ndarray, np.ndarray]
		]:
		"""
		This function wraps a high-performance JV solver and provides flexible
		I/O to match either lapjv-style vector outputs (x, y) or lapjvx/SciPy-style
		pair lists (rows, cols). It handles rectangular inputs by zero-padding to a
		square matrix internally when requested.

		Parameters
		----------
		cost : np.ndarray, shape (n, m)
		The cost matrix. Must be 2D and a real floating dtype. Values are treated
		as minimization costs. Rectangular matrices are supported via internal
		zero-padding when `extend_cost=True` or `extend_cost=None and n != m`.
		extend_cost : Optional[bool], default None
		Controls how rectangular inputs are handled:
		- True: Always zero-pad to a square internally (if needed).
		- False: Require a square matrix, otherwise raise ValueError.
		- None: Auto mode; pad iff the input is rectangular.
		return_cost : bool, default True
		If True, include the total assignment cost as the first return value.
		The total is always recomputed from the ORIGINAL input array `cost`
		(float64 accumulation) to match previous numeric behavior.
		jvx_like : bool, default True
		Selects the output format.
		- True: Return lapjvx/SciPy-style indexing arrays:
		return_cost=True -> (total_cost: float, rows: (k,), cols: (k,))
		return_cost=False -> (rows: (k,), cols: (k,))
		Here, `rows[i]` is assigned to `cols[i]`.
		- False: Return lapjv-style mapping vectors:
		return_cost=True -> (total_cost: float, x: (n0,), y: (m0,))
		return_cost=False -> (x: (n0,), y: (m0,))
		`x[i]` gives the assigned column for row i or -1 if unassigned.
		`y[j]` gives the assigned row for column j or -1 if unassigned.
		prefer_float32 : bool, default True
		When True, the solver kernel runs in float32 to reduce memory bandwidth
		and improve speed. When False and the input is float64, the kernel runs
		in float64. Regardless of kernel dtype, the returned total cost is
		recomputed against the ORIGINAL `cost` array.

		Returns
		-------
		See `jvx_like` and `return_cost` above for exact signatures. In all cases,
		index arrays are int64 and refer to indices in the ORIGINAL orientation of
		`cost` (not the internally transposed one).

		Raises
		------
		ValueError
		- If `cost` is not a 2D array.
		- If `extend_cost=False` and the input matrix is rectangular.

		Notes
		-----
		- Rectangular handling:
		Internally, the solver normalizes orientation so that the working matrix
		has rows <= cols. Rectangular problems are modeled by zero-padding on
		the right and/or bottom to become square. Only assignments within the
		original (n, m) region are returned and used for the total.
		- Dtype:
		The kernel may operate in float32 or float64, but accumulation for the
		returned total cost is performed in float64 on the ORIGINAL `cost`.
		"""
		# Keep the original array to compute the final cost from it (preserves previous behavior)
		A = np.asarray(cost)
		if A.ndim != 2:
		raise ValueError("cost must be a 2D array")

		n0, m0 = A.shape
		transposed = False

		# Normalize orientation for performance: let the kernel see rows <= cols.
		if n0 > m0:
		B = np.ascontiguousarray(A.T)
		transposed = True
		else:
		B = np.ascontiguousarray(A)

		n, m = B.shape
		extend = (n != m) if (extend_cost is None) else bool(extend_cost)

		# Choose backend and working dtype for the solver only
		use_float32_kernel = not ((prefer_float32 is False) and (B.dtype == np.float64))
		if use_float32_kernel:
		_kernel = _lapjvs_float32
		work_base = np.ascontiguousarray(B, dtype=np.float32)
		else:
		_kernel = _lapjvs_native
		work_base = np.ascontiguousarray(B, dtype=np.float64)

		def _rows_cols_from_x(x_vec: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
		if x_vec.size == 0:
		return np.empty((0,), dtype=np.int64), np.empty((0,), dtype=np.int64)
		mask = x_vec >= 0
		rows_b = np.nonzero(mask)[0].astype(np.int64, copy=False)
		cols_b = x_vec[mask].astype(np.int64, copy=False)
		if not transposed:
		return rows_b, cols_b
		# Map back to original orientation (A): swap row/col
		return cols_b, rows_b

		if not extend:
		# Square: call solver directly on chosen dtype, compute total from ORIGINAL A
		if n != m:
		# Guard (per docstring): if extend_cost=False, require square input
		raise ValueError("extend_cost=False requires a square cost matrix")
		x_raw_obj, y_raw_obj = _kernel(work_base)

		x_raw_b = np.asarray(x_raw_obj, dtype=np.int64)

		if jvx_like:
		rows_a, cols_a = _rows_cols_from_x(x_raw_b)
		if return_cost:
		total = float(A[rows_a, cols_a].sum()) if rows_a.size else 0.0
		return total, rows_a, cols_a
		else:
		return rows_a, cols_a
		else:
		# Return vectors (x, y) in original orientation
		y_raw_b = np.asarray(y_raw_obj, dtype=np.int64)

		if not transposed:
		if return_cost:
		total = float(A[np.arange(n), x_raw_b].sum()) if n > 0 else 0.0
		return total, x_raw_b, y_raw_b
		else:
		return x_raw_b, y_raw_b

		# transposed square should not happen (n0==m0 implies no transpose), but keep safe mapping
		# Build pairs from B then map to A vectors
		rows_a, cols_a = _rows_cols_from_x(x_raw_b)
		x_out = np.full(n0, -1, dtype=np.int64)
		if rows_a.size:
		x_out[rows_a] = cols_a
		y_out = np.full(m0, -1, dtype=np.int64)
		if rows_a.size:
		y_out[cols_a] = rows_a
		if return_cost:
		total = float(A[rows_a, cols_a].sum()) if rows_a.size else 0.0
		return total, x_out, y_out
		else:
		return x_out, y_out

		# Rectangular: zero-pad to square (in B space), solve, map back; compute total from ORIGINAL A
		size = max(n, m)
		padded = np.empty((size, size), dtype=work_base.dtype)
		# copy original submatrix
		padded[:n, :m] = work_base
		if m < size:
		padded[:n, m:] = 0
		if n < size:
		padded[n:, :] = 0

		x_pad_obj, y_pad_obj = _kernel(padded)
		x_pad_b = np.asarray(x_pad_obj, dtype=np.int64)

		# Trim to original rectangle (B space), then map to A space if needed
		cols_pad_n = x_pad_b[:n]
		mask_r_b = (cols_pad_n >= 0) & (cols_pad_n < m)

		# Prepare pairs in A-space for convenience
		if mask_r_b.any():
		rows_b = np.nonzero(mask_r_b)[0].astype(np.int64, copy=False)
		cols_b = cols_pad_n[mask_r_b].astype(np.int64, copy=False)
		if transposed:
		rows_a = cols_b
		cols_a = rows_b
		else:
		rows_a = rows_b
		cols_a = cols_b
		else:
		rows_a = np.empty((0,), dtype=np.int64)
		cols_a = np.empty((0,), dtype=np.int64)

		if jvx_like:
		total = float(A[rows_a, cols_a].sum()) if (return_cost and rows_a.size) else 0.0
		return (total, rows_a, cols_a) if return_cost else (rows_a, cols_a)

		# lapjv-like outputs (vectorized) in ORIGINAL orientation
		x_out = np.full(n0, -1, dtype=np.int64)
		y_out = np.full(m0, -1, dtype=np.int64)
		if rows_a.size:
		x_out[rows_a] = cols_a
		y_out[cols_a] = rows_a

		if return_cost and rows_a.size:
		total = float(A[rows_a, cols_a].sum())
		else:
		total = 0.0

		return (total, x_out, y_out) if return_cost else (x_out, y_out)


		def lapjvsa(
		cost: np.ndarray,
		extend_cost: Optional[bool] = None,
		return_cost: bool = True,
		prefer_float32: bool = True,
		) -> Union[
		Tuple[float, np.ndarray],
		np.ndarray
		]:
		"""
		This variant returns a compact pairs array of shape (K, 2), where each row
		is a (row_index, col_index) assignment in the ORIGINAL orientation of the
		input matrix. Rectangular inputs are handled by internal zero-padding if
		requested.

		Parameters
		----------
		cost : np.ndarray, shape (n, m)
		Cost matrix (float32/float64). Must be 2D.
		extend_cost : Optional[bool], default None
		Rectangular handling:
		- True: Zero-pad to square internally (if needed).
		- False: Require square, else raise ValueError.
		- None: Auto; pad iff rectangular.
		return_cost : bool, default True
		If True, include the total cost as the first return value. The total is
		computed from the ORIGINAL input matrix.
		prefer_float32 : bool, default True
		Hint to run the solver kernel in float32 for performance. When False and
		the input is float64, the kernel uses float64.

		Returns
		-------
		If return_cost is True:
		(total_cost: float, pairs: np.ndarray[int64] with shape (K, 2))
		Else:
		pairs: np.ndarray[int64] with shape (K, 2)

		Raises
		------
		ValueError
		If `cost` is not 2D, or if `extend_cost=False` and `cost` is rectangular.

		Notes
		-----
		- Orientation is normalized internally so the kernel sees rows <= cols.
		Returned pairs are always mapped back to the ORIGINAL orientation.
		- Pairs only include assignments within the original (n, m) region for
		rectangular inputs.
		- Total is accumulated in float64 from the ORIGINAL `cost`.
		"""
		A = np.asarray(cost)
		if A.ndim != 2:
		raise ValueError("cost must be a 2D array")

		n0, m0 = A.shape
		transposed = False

		# Normalize orientation for performance
		if n0 > m0:
		B = np.ascontiguousarray(A.T)
		transposed = True
		else:
		B = np.ascontiguousarray(A)

		n, m = B.shape
		extend = (n != m) if (extend_cost is None) else bool(extend_cost)

		# Select dtype/backend
		use_f32 = not ((prefer_float32 is False) and (B.dtype == np.float64))
		wdtype = np.float32 if use_f32 else (B.dtype if B.dtype in (np.float32, np.float64) else np.float64)

		if not extend:
		if n != m:
		raise ValueError("extend_cost=False requires a square cost matrix")
		work = np.ascontiguousarray(B, dtype=wdtype)
		pairs_b_obj = (_lapjvsa_float32(work) if use_f32 else _lapjvsa_native(work))
		pairs_b = np.asarray(pairs_b_obj, dtype=np.int64)

		# Map pairs back to original orientation
		if transposed and pairs_b.size:
		pairs_a = pairs_b[:, ::-1].astype(np.int64, copy=False)
		else:
		pairs_a = pairs_b

		if return_cost:
		if pairs_a.size:
		r = pairs_a[:, 0]; c = pairs_a[:, 1]
		total = float(A[r, c].sum())
		else:
		total = 0.0
		return total, pairs_a
		return pairs_a

		# Rectangular: zero-pad in B space, solve, trim, map back to A
		size = max(n, m)
		padded = np.empty((size, size), dtype=wdtype)
		padded[:n, :m] = B.astype(wdtype, copy=False)
		if m < size:
		padded[:n, m:] = 0
		if n < size:
		padded[n:, :] = 0

		pairs_pad_b_obj = (_lapjvsa_float32(padded) if use_f32 else _lapjvsa_native(padded))
		pairs_pad_b = np.asarray(pairs_pad_b_obj, dtype=np.int64)

		if pairs_pad_b.size == 0 or n == 0 or m == 0:
		pairs_a = np.empty((0, 2), dtype=np.int64)
		total = 0.0
		else:
		r_b = pairs_pad_b[:, 0]
		c_b = pairs_pad_b[:, 1]
		mask_b = (r_b >= 0) & (r_b < n) & (c_b >= 0) & (c_b < m)
		if mask_b.any():
		pairs_b = np.stack([r_b[mask_b], c_b[mask_b]], axis=1).astype(np.int64, copy=False)
		# Map back to A orientation if needed
		pairs_a = pairs_b[:, ::-1] if transposed else pairs_b
		if return_cost and pairs_a.size:
		total = float(A[pairs_a[:, 0], pairs_a[:, 1]].sum())
		else:
		total = 0.0
		else:
		pairs_a = np.empty((0, 2), dtype=np.int64)
		total = 0.0

		return (total, pairs_a) if return_cost else pairs_a

+198

lap/_lapjvx_batch_wp.py

		# Copyright (c) 2025 Ratha SIV \| MIT License

		import os
		import numpy as np
		from typing import List, Tuple, Union
		from concurrent.futures import ThreadPoolExecutor, as_completed

		from ._lapjvx import lapjvx as _lapjvx_single # type: ignore
		from ._lapjvx import lapjvxa as _lapjvxa_single # type: ignore


		def _normalize_threads(n_threads: int) -> int:
		if not n_threads:
		return max(1, int(os.cpu_count() or 1))
		return max(1, int(n_threads))


		def lapjvx_batch(
		costs: np.ndarray,
		extend_cost: bool = False,
		cost_limit: float = np.inf,
		return_cost: bool = True,
		n_threads: int = 0,
		) -> Union[
		Tuple[np.ndarray, List[np.ndarray], List[np.ndarray]],
		Tuple[List[np.ndarray], List[np.ndarray]]
		]:
		"""
		Batched lapjvx solver with a thread pool.

		This function applies a JVX-style solver (`lapjvx`) across a batch of cost
		matrices and aggregates the results. It preserves batch order and supports
		multi-threaded execution.

		Parameters
		----------
		costs : np.ndarray, shape (B, N, M)
		Batch of cost matrices (float32/float64).
		extend_cost : bool, default False
		If True, rectangular matrices are handled via internal zero-padding.
		If False, instances must be square.
		cost_limit : float, default np.inf
		A per-instance threshold to prune/limit assignments, forwarded to the
		underlying `lapjvx` implementation.
		return_cost : bool, default True
		If True, returns per-instance totals first.
		n_threads : int, default 0
		Number of worker threads. 0 or None uses `os.cpu_count()`.

		Returns
		-------
		If return_cost is True:
		totals : np.ndarray, shape (B,), float64
		rows_list : List[np.ndarray[int64]] of length B
		cols_list : List[np.ndarray[int64]] of length B
		Else:
		rows_list, cols_list

		Raises
		------
		ValueError
		- If `costs` is not a 3D array.
		- If any instance is rectangular while `extend_cost=False`.

		Notes
		-----
		- See the single-instance `lapjvx` for detailed behavior around `extend_cost`
		and `cost_limit`.
		"""
		A = np.asarray(costs)
		if A.ndim != 3:
		raise ValueError("3-dimensional array expected [B, N, M]")
		B = A.shape[0]
		threads = _normalize_threads(n_threads)

		totals = np.empty((B,), dtype=np.float64) if return_cost else None
		rows_list: List[np.ndarray] = [None] * B # type: ignore
		cols_list: List[np.ndarray] = [None] * B # type: ignore

		def work(bi: int):
		a2d = A[bi]
		if return_cost:
		total, rows, cols = _lapjvx_single(
		a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=True
		)
		return bi, total, rows, cols
		else:
		rows, cols = _lapjvx_single(
		a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=False
		)
		return bi, None, rows, cols

		if threads == 1 or B == 1:
		for bi in range(B):
		i, t, r, c = work(bi)
		if return_cost:
		totals[i] = float(t) # type: ignore
		rows_list[i] = np.asarray(r, dtype=np.int64)
		cols_list[i] = np.asarray(c, dtype=np.int64)
		else:
		# Cap workers to the batch size
		with ThreadPoolExecutor(max_workers=min(threads, B)) as ex:
		futures = [ex.submit(work, bi) for bi in range(B)]
		for fut in as_completed(futures):
		i, t, r, c = fut.result()
		if return_cost:
		totals[i] = float(t) # type: ignore
		rows_list[i] = np.asarray(r, dtype=np.int64)
		cols_list[i] = np.asarray(c, dtype=np.int64)

		if return_cost:
		return np.asarray(totals, dtype=np.float64), rows_list, cols_list # type: ignore
		return rows_list, cols_list


		def lapjvxa_batch(
		costs: np.ndarray,
		extend_cost: bool = False,
		cost_limit: float = np.inf,
		return_cost: bool = True,
		n_threads: int = 0,
		) -> Union[Tuple[np.ndarray, List[np.ndarray]], List[np.ndarray]]:
		"""
		Batched lapjvxa solver, returning (K_b, 2) arrays per instance.

		Parameters
		----------
		costs : np.ndarray, shape (B, N, M)
		Batch of cost matrices.
		extend_cost : bool, default False
		If True, rectangular matrices are solved by internal zero-padding.
		cost_limit : float, default np.inf
		Forwarded to `lapjvxa` to limit/prune assignments.
		return_cost : bool, default True
		If True, includes per-instance totals as the first returned array.
		n_threads : int, default 0
		Number of worker threads. 0 or None uses `os.cpu_count()`.

		Returns
		-------
		If return_cost is True:
		totals : np.ndarray, shape (B,), float64
		pairs_list : List[np.ndarray[int64] with shape (K_b, 2)]
		Else:
		pairs_list

		Raises
		------
		ValueError
		If `costs` is not a 3D array, or if any instance is rectangular while
		`extend_cost=False`.

		Notes
		-----
		- See `lapjvxa` for single-instance behavior and semantics of `cost_limit`.
		- Results are returned in the original batch order.
		"""
		A = np.asarray(costs)
		if A.ndim != 3:
		raise ValueError("3-dimensional array expected [B, N, M]")
		B = A.shape[0]
		threads = _normalize_threads(n_threads)

		totals = np.empty((B,), dtype=np.float64) if return_cost else None
		pairs_list: List[np.ndarray] = [None] * B # type: ignore

		def work(bi: int):
		a2d = A[bi]
		if return_cost:
		total, pairs = _lapjvxa_single(
		a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=True
		)
		return bi, total, pairs
		else:
		pairs = _lapjvxa_single(
		a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=False
		)
		return bi, None, pairs

		if threads == 1 or B == 1:
		for bi in range(B):
		i, t, P = work(bi)
		if return_cost:
		totals[i] = float(t) # type: ignore
		pairs_list[i] = np.asarray(P, dtype=np.int64)
		else:
		# Cap workers to the batch size
		with ThreadPoolExecutor(max_workers=min(threads, B)) as ex:
		futures = [ex.submit(work, bi) for bi in range(B)]
		for fut in as_completed(futures):
		i, t, P = fut.result()
		if return_cost:
		totals[i] = float(t) # type: ignore
		pairs_list[i] = np.asarray(P, dtype=np.int64)

		if return_cost:
		return np.asarray(totals, dtype=np.float64), pairs_list # type: ignore
		return pairs_list

+98

lap/_lapjvx_wp.py

		# Copyright (c) 2025 Ratha SIV \| MIT License

		import numpy as np
		from typing import Tuple, Union

		from ._lapjvx import lapjvx as _lapjvx # type: ignore
		from ._lapjvx import lapjvxa as _lapjvxa # type: ignore


		def lapjvx(
		cost: np.ndarray,
		extend_cost: bool = False,
		cost_limit: float = np.inf,
		return_cost: bool = True,
		) -> Union[
		Tuple[float, np.ndarray, np.ndarray],
		Tuple[np.ndarray, np.ndarray],
		]:
		"""
		Solve the Linear Assignment Problem using the Jonker-Volgenant algorithm,
		returning (row_indices, col_indices) like scipy.optimize.linear_sum_assignment.

		Parameters
		----------
		cost : np.ndarray, shape (N, M)
		2D cost matrix. Any float dtype is accepted; internally a single contiguous
		float64 working buffer is used when required.
		extend_cost : bool, default False
		Permit rectangular inputs by zero-padding to a square matrix.
		cost_limit : float, default np.inf
		If finite, augment to size (N+M) with sentinel edges of cost_limit/2 and a
		bottom-right zero block, modeling a per-edge reject cost (allows rectangular inputs).
		return_cost : bool, default True
		If True, include total assignment cost first (computed on the ORIGINAL input).

		Returns
		-------
		If return_cost is True:
		total_cost : float
		row_indices : np.ndarray with shape (K,), dtype int64
		Row indices of selected assignments (in original orientation).
		col_indices : np.ndarray with shape (K,), typically dtype int32
		Column indices corresponding to row_indices.
		Else:
		row_indices, col_indices

		Notes
		-----
		- Orientation is normalized internally so the native kernel sees rows <= cols;
		indices are mapped back to the ORIGINAL orientation on return.
		- Dtypes of the returned indices follow the Cython implementation:
		row_indices as int64, col_indices often int32 (subject to NumPy/platform).
		- Unified augmentation policy:
		* cost_limit < inf: augment to (N+M) (rectangular allowed).
		* elif (N != M) or extend_cost: zero-pad to square max(N, M).
		* else: run on the given square matrix.
		"""
		return _lapjvx(cost, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=return_cost)


		def lapjvxa(
		cost: np.ndarray,
		extend_cost: bool = False,
		cost_limit: float = np.inf,
		return_cost: bool = True,
		) -> Union[
		Tuple[float, np.ndarray],
		np.ndarray,
		]:
		"""
		Like lapjvx, but returns assignment pairs as a compact (K, 2) ndarray of (row, col).

		Parameters
		----------
		cost : np.ndarray, shape (N, M)
		2D cost matrix.
		extend_cost : bool, default False
		Permit rectangular inputs by zero-padding to a square matrix.
		cost_limit : float, default np.inf
		When finite, augment to (N+M) to model per-edge reject cost (see lapjvx).
		return_cost : bool, default True
		If True, include the total cost as the first element.

		Returns
		-------
		If return_cost is True:
		total_cost : float
		assignments : np.ndarray with shape (K, 2), dtype int32
		Each row is (row_index, col_index) in the ORIGINAL orientation.
		Else:
		assignments : np.ndarray with shape (K, 2), dtype int32

		Notes
		-----
		- This is a convenience wrapper over lapjvx that packs (rows, cols) into a (K, 2) array.
		- Total cost is computed on the ORIGINAL input (not augmented or padded).
		"""
		return _lapjvxa(cost, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=return_cost)

+356

lap/_lapmod_wp.py

		# Copyright (c) 2012-2025 Tomas Kazmar \| BSD-2-Clause License

		import numpy as np
		import numpy.typing as npt
		from bisect import bisect_left
		from typing import Tuple, Union

		# import logging

		from ._lapjv import _lapmod, FP_DYNAMIC_ as FP_DYNAMIC, LARGE_ as LARGE # type: ignore


		def _pycrrt(n, cc, ii, kk, free_rows, x, y, v):
		# log = logging.getLogger('do_column_reduction_and_reduction_transfer')
		x[:] = -1
		y[:] = -1
		v[:] = LARGE
		for i in range(n):
		ks = slice(ii[i], ii[i+1])
		js = kk[ks]
		ccs = cc[ks]
		mask = ccs < v[js]
		js = js[mask]
		v[js] = ccs[mask]
		y[js] = i
		# log.debug('v = %s', v)
		# for j in range(cost.shape[1]):
		unique = np.empty((n,), dtype=bool)
		unique[:] = True
		for j in range(n-1, -1, -1):
		i = y[j]
		# If row is not taken yet, initialize it with the minimum stored in y.
		if x[i] < 0:
		x[i] = j
		else:
		unique[i] = False
		y[j] = -1
		# log.debug('bw %s %s %s %s', i, j, x, y)
		# log.debug('unique %s', unique)
		n_free_rows = 0
		for i in range(n):
		# Store unassigned row i.
		if x[i] < 0:
		free_rows[n_free_rows] = i
		n_free_rows += 1
		elif unique[i] and ii[i+1] - ii[i] > 1:
		# >1 check prevents choking on rows with a single entry
		# Transfer from an assigned row.
		j = x[i]
		# Find the current 2nd minimum of the reduced column costs:
		# (cost[i,j] - v[j]) for some j.
		ks = slice(ii[i], ii[i+1])
		js = kk[ks]
		minv = np.min(cc[ks][js != j] - v[js][js != j])
		# log.debug("v[%d] = %f - %f", j, v[j], minv)
		v[j] -= minv
		# log.debug('free: %s', free_rows[:n_free_rows])
		# log.debug('%s %s', x, v)
		return n_free_rows


		def find_minima(indices, values):
		if len(indices) > 0:
		j1 = indices[0]
		v1 = values[0]
		else:
		j1 = 0
		v1 = LARGE
		j2 = -1
		v2 = LARGE
		# log = logging.getLogger('find_minima')
		# log.debug(sorted(zip(values, indices))[:2])
		for j, h in zip(indices[1:], values[1:]):
		# log.debug('%d = %f %d = %f', j1, v1, j2, v2)
		if h < v2:
		if h >= v1:
		v2 = h
		j2 = j
		else:
		v2 = v1
		v1 = h
		j2 = j1
		j1 = j
		# log.debug('%d = %f %d = %f', j1, v1, j2, v2)
		return j1, v1, j2, v2


		def _pyarr(n, cc, ii, kk, n_free_rows, free_rows, x, y, v):
		# log = logging.getLogger('do_augmenting_row_reduction')
		# log.debug('%s %s %s', x, y, v)
		current = 0
		# log.debug('free: %s', free_rows[:n_free_rows])
		new_free_rows = 0
		while current < n_free_rows:
		free_i = free_rows[current]
		# log.debug('current = %d', current)
		current += 1
		ks = slice(ii[free_i], ii[free_i+1])
		js = kk[ks]
		j1, v1, j2, v2 = find_minima(js, cc[ks] - v[js])
		i0 = y[j1]
		v1_new = v[j1] - (v2 - v1)
		v1_lowers = v1_new < v[j1]
		# log.debug(
		# '%d %d 1=%s,%f 2=%s,%f %f %s',
		# free_i, i0, j1, v1, j2, v2, v1_new, v1_lowers)
		if v1_lowers:
		v[j1] = v1_new
		elif i0 >= 0 and j2 != -1: # i0 is assigned, try j2
		j1 = j2
		i0 = y[j2]
		x[free_i] = j1
		y[j1] = free_i
		if i0 >= 0:
		if v1_lowers:
		current -= 1
		# log.debug('continue augmenting path from current %s %s %s')
		free_rows[current] = i0
		else:
		# log.debug('stop the augmenting path and keep for later')
		free_rows[new_free_rows] = i0
		new_free_rows += 1
		# log.debug('free: %s', free_rows[:new_free_rows])
		return new_free_rows


		def binary_search(data, key):
		# log = logging.getLogger('binary_search')
		i = bisect_left(data, key)
		# log.debug('Found data[%d]=%d for %d', i, data[i], key)
		if i < len(data) and data[i] == key:
		return i
		else:
		return None


		def _find(hi, d, cols, y):
		lo, hi = hi, hi + 1
		minv = d[cols[lo]]
		# XXX: anytime this happens to be NaN, i'm screwed...
		# assert not np.isnan(minv)
		for k in range(hi, len(cols)):
		j = cols[k]
		if d[j] <= minv:
		# New minimum found, trash the new SCAN columns found so far.
		if d[j] < minv:
		hi = lo
		minv = d[j]
		cols[k], cols[hi] = cols[hi], j
		hi += 1
		return minv, hi, cols


		def _scan(n, cc, ii, kk, minv, lo, hi, d, cols, pred, y, v):
		# log = logging.getLogger('_scan')
		# Scan all TODO columns.
		while lo != hi:
		j = cols[lo]
		lo += 1
		i = y[j]
		# log.debug('?%d kk[%d:%d]=%s', j, ii[i], ii[i+1], kk[ii[i]:ii[i+1]])
		kj = binary_search(kk[ii[i]:ii[i+1]], j)
		if kj is None:
		continue
		kj = ii[i] + kj
		h = cc[kj] - v[j] - minv
		# log.debug('i=%d j=%d kj=%s h=%f', i, j, kj, h)
		for k in range(hi, n):
		j = cols[k]
		kj = binary_search(kk[ii[i]:ii[i+1]], j)
		if kj is None:
		continue
		kj = ii[i] + kj
		cred_ij = cc[kj] - v[j] - h
		if cred_ij < d[j]:
		d[j] = cred_ij
		pred[j] = i
		if cred_ij == minv:
		if y[j] < 0:
		return j, None, None, d, cols, pred
		cols[k] = cols[hi]
		cols[hi] = j
		hi += 1
		return -1, lo, hi, d, cols, pred


		def find_path(n, cc, ii, kk, start_i, y, v):
		# log = logging.getLogger('find_path')
		cols = np.arange(n, dtype=int)
		pred = np.empty((n,), dtype=int)
		pred[:] = start_i
		d = np.empty((n,), dtype=float)
		d[:] = LARGE
		ks = slice(ii[start_i], ii[start_i+1])
		js = kk[ks]
		d[js] = cc[ks] - v[js]
		# log.debug('d = %s', d)
		minv = LARGE
		lo, hi = 0, 0
		n_ready = 0
		final_j = -1
		while final_j == -1:
		# No SCAN columns, find new ones.
		if lo == hi:
		# log.debug('%d..%d -> find', lo, hi)
		# log.debug('cols = %s', cols)
		n_ready = lo
		minv, hi, cols = _find(hi, d, cols, y)
		# log.debug('%d..%d -> check', lo, hi)
		# log.debug('cols = %s', cols)
		# log.debug('y = %s', y)
		for h in range(lo, hi):
		# If any of the new SCAN columns is unassigned, use it.
		if y[cols[h]] < 0:
		final_j = cols[h]
		if final_j == -1:
		# log.debug('%d..%d -> scan', lo, hi)
		final_j, lo, hi, d, cols, pred = _scan(
		n, cc, ii, kk, minv, lo, hi, d, cols, pred, y, v)
		# log.debug('d = %s', d)
		# log.debug('cols = %s', cols)
		# log.debug('pred = %s', pred)

		# Update prices for READY columns.
		for k in range(n_ready): # type: ignore
		j0 = cols[k]
		v[j0] += d[j0] - minv

		assert final_j >= 0
		assert final_j < n
		return final_j, pred


		def _pya(n, cc, ii, kk, n_free_rows, free_rows, x, y, v):
		# log = logging.getLogger('augment')
		for free_i in free_rows[:n_free_rows]:
		# log.debug('looking at free_i=%s', free_i)
		j, pred = find_path(n, cc, ii, kk, free_i, y, v)
		# Augment the path starting from column j and backtracking to free_i.
		i = -1
		while i != free_i:
		# log.debug('augment %s', j)
		# log.debug('pred = %s', pred)
		i = pred[j]
		assert i >= 0
		assert i < n
		# log.debug('y[%d]=%d -> %d', j, y[j], i)
		y[j] = i
		j, x[i] = x[i], j


		def check_cost(n, cc, ii, kk):
		if n == 0:
		raise ValueError('Cost matrix has zero rows.')
		if len(kk) == 0:
		raise ValueError('Cost matrix has zero columns.')
		lo = cc.min()
		hi = cc.max()
		if lo < 0:
		raise ValueError('Cost matrix values must be non-negative.')
		if hi >= LARGE:
		raise ValueError(
		'Cost matrix values must be less than %s' % LARGE)


		def get_cost(n, cc, ii, kk, x0):
		ret = 0
		for i, j in enumerate(x0):
		kj = binary_search(kk[ii[i]:ii[i+1]], j)
		if kj is None:
		return np.inf
		kj = ii[i] + kj
		ret += cc[kj]
		return ret


		# def lapmod(n, cc, ii, kk, fast=True, return_cost=True, fp_version=FP_DYNAMIC):
		def lapmod(
		n: int,
		cc: npt.NDArray[np.floating],
		ii: npt.NDArray[np.integer],
		kk: npt.NDArray[np.integer],
		fast: bool = True,
		return_cost: bool = True,
		fp_version: int = FP_DYNAMIC,
		) -> Union[
		Tuple[float, np.ndarray, np.ndarray],
		Tuple[np.ndarray, np.ndarray],
		]:
		"""Solve sparse linear assignment problem using Jonker-Volgenant algorithm.

		n: number of rows of the assignment cost matrix
		cc: 1D array of all finite elements of the assignement cost matrix
		ii: 1D array of indices of the row starts in cc. The following must hold:
		ii[0] = 0 and ii[n+1] = len(cc).
		kk: 1D array of the column indices so that:
		cost[i, kk[ii[i] + k]] == cc[ii[i] + k].
		Indices within one row must be sorted.
		extend_cost: whether or not extend a non-square matrix [default: False]
		cost_limit: an upper limit for a cost of a single assignment
		[default: np.inf]
		return_cost: whether or not to return the assignment cost

		Returns (opt, x, y) where:
		opt: cost of the assignment
		x: vector of columns assigned to rows
		y: vector of rows assigned to columns
		or (x, y) if return_cost is not True.

		When extend_cost and/or cost_limit is set, all unmatched entries will be
		marked by -1 in x/y.
		"""
		# log = logging.getLogger('lapmod')

		check_cost(n, cc, ii, kk)

		if fast is True:
		# log.debug('[----CR & RT & ARR & augmentation ----]')
		x, y = _lapmod(n, cc, ii, kk, fp_version=fp_version)
		else:
		cc = np.ascontiguousarray(cc, dtype=np.float64)
		ii = np.ascontiguousarray(ii, dtype=np.int32)
		kk = np.ascontiguousarray(kk, dtype=np.int32)
		x = np.empty((n,), dtype=np.int32)
		y = np.empty((n,), dtype=np.int32)
		v = np.empty((n,), dtype=np.float64)
		free_rows = np.empty((n,), dtype=np.int32)
		# log.debug('[----Column reduction & reduction transfer----]')
		n_free_rows = _pycrrt(n, cc, ii, kk, free_rows, x, y, v)
		# log.debug(
		# 'free, x, y, v: %s %s %s %s', free_rows[:n_free_rows], x, y, v)
		if n_free_rows == 0:
		# log.info('Reduction solved it.')
		if return_cost is True:
		return get_cost(n, cc, ii, kk, x), x, y
		else:
		return x, y
		for it in range(2):
		# log.debug('[---Augmenting row reduction (iteration: %d)---]', it)
		n_free_rows = _pyarr(
		n, cc, ii, kk, n_free_rows, free_rows, x, y, v)
		# log.debug(
		# 'free, x, y, v: %s %s %s %s', free_rows[:n_free_rows], x, y, v)
		if n_free_rows == 0:
		# log.info('Augmenting row reduction solved it.')
		if return_cost is True:
		return get_cost(n, cc, ii, kk, x), x, y
		else:
		return x, y
		# log.info('[----Augmentation----]')
		_pya(n, cc, ii, kk, n_free_rows, free_rows, x, y, v)
		# log.debug('x, y, v: %s %s %s', x, y, v)
		if return_cost is True:
		return get_cost(n, cc, ii, kk, x), x, y
		else:
		return x, y

+477

-452

benchmark.md

		[![GitHub release](https://img.shields.io/github/release/rathaROG/lapx.svg)](https://github.com/rathaROG/lapx/releases)
		[![PyPI version](https://badge.fury.io/py/lapx.svg)](https://badge.fury.io/py/lapx)
		[![Benchmark](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml)
		[![PyPI version](https://badge.fury.io/py/lapx.svg?v=0.8.1)](https://badge.fury.io/py/lapx)
		[![Benchmark (Single)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml)
		[![Benchmark (Batch)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml)
		@@ -9,4 +9,4 @@ [![Benchmark (Object Tracking)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml)

		`lapx` focuses more on real-world applications, and the [benchmark_batch.py](https://github.com/rathaROG/lapx/blob/main/.github/test/benchmark_batch.py)
		and [benchmark.py](https://github.com/rathaROG/lapx/blob/main/.github/test/benchmark.py) are not
		`lapx` focuses more on real-world applications, and the [benchmark_batch.py](https://github.com/rathaROG/lapx/blob/main/benchmarks/benchmark_batch.py)
		and [benchmark_single.py](https://github.com/rathaROG/lapx/blob/main/benchmarks/benchmark_single.py) are not
		intended for scientific research or competitive evaluation. Instead, it provides a quick and accessible way for
		@@ -24,53 +24,68 @@ you to run benchmark tests on your own machine. Below, you will also find a collection of interesting results
		git clone https://github.com/rathaROG/lapx.git
		cd lapx/.github/test
		cd lapx/benchmarks
		python benchmark_batch.py
		python benchmark.py
		python benchmark_single.py
		```

		Note: [SciPy](https://pypi.org/project/scipy/) is used as the baseline in the benchmark.
		Note: [SciPy](https://pypi.org/project/scipy/) is used as the baseline in the benchmark single `benchmark_single.py`.

		📊 Some benchmark results using `lapx` [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) (2025/10/27):
		📊 Some benchmark results using `lapx` [v0.9.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) (2025/10/27):

		<details><summary>🗂️ Batch on my Windows 11 i9-13900KS (8 p-core + 8 e-core) + python 3.9.13:</summary>
		<details><summary>🗂️ Batch on my local Windows 11 i9-13900KS (8 p-core + 8 e-core) + python 3.11.9:</summary>

		```
		# 50 x (3000x3000) \| n_threads = 24
		Microsoft Windows [Version 10.0.26200.7019]
		(c) Microsoft Corporation. All rights reserved.

		CPU lapx-batch-jvx : cost=82.08230873, time=1.40321970s
		CPU lapx-batch-jvs : cost=82.08230873, time=0.80294538s
		CPU lapx-batch-jvxa : cost=82.08230873, time=1.40610409s
		CPU lapx-batch-jvsa : cost=82.08230873, time=0.81906796s
		CPU lapx-batch-jvsa64 : cost=82.08230873, time=1.42109323s
		CPU lapx-loop-jvx : cost=82.08230873, time=11.01966000s
		CPU lapx-loop-jvs : cost=82.08230873, time=8.18710470s
		D:\DEV\lapx_all\tmp\lapx\benchmarks>python benchmark_batch.py

		# 100 x (2000x2000) \| n_threads = 24
		# 10 x (4000x4000) \| n_threads = 24

		CPU lapx-batch-jvx : cost=164.54469568, time=0.81932855s
		CPU lapx-batch-jvs : cost=164.54469568, time=0.58506370s
		CPU lapx-batch-jvxa : cost=164.54469568, time=0.83581567s
		CPU lapx-batch-jvsa : cost=164.54469568, time=0.59467125s
		CPU lapx-batch-jvsa64 : cost=164.54469568, time=0.88178015s
		CPU lapx-loop-jvx : cost=164.54469568, time=7.68291450s
		CPU lapx-loop-jvs : cost=164.54469568, time=6.44884777s
		CPU lapx-batch-jvx : cost=16.48859572, time=0.67588449s
		CPU lapx-batch-jvs : cost=16.48859572, time=0.46411657s
		CPU lapx-batch-jvxa : cost=16.48859572, time=0.71385884s
		CPU lapx-batch-jvsa : cost=16.48859572, time=0.45670390s
		CPU lapx-batch-jvsa64 : cost=16.48859572, time=0.70847058s
		CPU lapx-loop-jvx : cost=16.48859572, time=3.95986462s
		CPU lapx-loop-jvs : cost=16.48859572, time=2.66866994s

		# 500 x (1000x2000) \| n_threads = 24
		# 20 x (3000x2000) \| n_threads = 24

		CPU lapx-batch-jvx : cost=291.25078928, time=0.91706204s
		CPU lapx-batch-jvs : cost=291.25078928, time=0.79455686s
		CPU lapx-batch-jvxa : cost=291.25078928, time=0.93096972s
		CPU lapx-batch-jvsa : cost=291.25078928, time=0.79109597s
		CPU lapx-batch-jvsa64 : cost=291.25078928, time=1.23274732s
		CPU lapx-loop-jvx : cost=291.25078928, time=5.47222424s
		CPU lapx-loop-jvs : cost=291.25078928, time=5.73832059s
		CPU lapx-batch-jvx : cost=16.65042067, time=0.18923616s
		CPU lapx-batch-jvs : cost=16.65042067, time=0.17624354s
		CPU lapx-batch-jvxa : cost=16.65042067, time=0.18447852s
		CPU lapx-batch-jvsa : cost=16.65042067, time=0.18925667s
		CPU lapx-batch-jvsa64 : cost=16.65042067, time=0.18949389s
		CPU lapx-loop-jvx : cost=16.65042067, time=0.85662770s
		CPU lapx-loop-jvs : cost=16.65042067, time=1.05569839s

		# 1000 x (1000x1000) \| n_threads = 24
		# 50 x (2000x2000) \| n_threads = 24

		CPU lapx-batch-jvx : cost=1641.72891905, time=1.18257976s
		CPU lapx-batch-jvs : cost=1641.72891905, time=1.13616300s
		CPU lapx-batch-jvxa : cost=1641.72891905, time=1.16668177s
		CPU lapx-batch-jvsa : cost=1641.72891905, time=1.11944461s
		CPU lapx-batch-jvsa64 : cost=1641.72891905, time=1.23001194s
		CPU lapx-loop-jvx : cost=1641.72891905, time=13.90460992s
		CPU lapx-loop-jvs : cost=1641.72891905, time=14.32015085s
		CPU lapx-batch-jvx : cost=82.12386385, time=0.56725645s
		CPU lapx-batch-jvs : cost=82.12386385, time=0.37664533s
		CPU lapx-batch-jvxa : cost=82.12386385, time=0.57265162s
		CPU lapx-batch-jvsa : cost=82.12386385, time=0.37772393s
		CPU lapx-batch-jvsa64 : cost=82.12386385, time=0.61493921s
		CPU lapx-loop-jvx : cost=82.12386385, time=4.46092606s
		CPU lapx-loop-jvs : cost=82.12386385, time=3.49988031s

		# 100 x (1000x2000) \| n_threads = 24

		CPU lapx-batch-jvx : cost=58.19636934, time=0.18971944s
		CPU lapx-batch-jvs : cost=58.19636934, time=0.16700149s
		CPU lapx-batch-jvxa : cost=58.19636934, time=0.18943620s
		CPU lapx-batch-jvsa : cost=58.19636934, time=0.16706610s
		CPU lapx-batch-jvsa64 : cost=58.19636934, time=0.25204611s
		CPU lapx-loop-jvx : cost=58.19636934, time=1.02838278s
		CPU lapx-loop-jvs : cost=58.19636934, time=1.21967244s

		# 500 x (1000x1000) \| n_threads = 24

		CPU lapx-batch-jvx : cost=821.97407482, time=0.59273267s
		CPU lapx-batch-jvs : cost=821.97407482, time=0.58274126s
		CPU lapx-batch-jvxa : cost=821.97407482, time=0.58346224s
		CPU lapx-batch-jvsa : cost=821.97407482, time=0.58098578s
		CPU lapx-batch-jvsa64 : cost=821.97407482, time=0.61520362s
		CPU lapx-loop-jvx : cost=821.97407482, time=6.64442897s
		CPU lapx-loop-jvs : cost=821.97407482, time=7.03527546s
		```
		@@ -82,3 +97,3 @@

		https://github.com/rathaROG/lapx/actions/runs/18851354956/job/53788494991
		https://github.com/rathaROG/lapx/actions/runs/18961613065/job/54149890164

		@@ -89,17 +104,17 @@ ```
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 2.13 x slower
		* lapjv : ✅ Passed 🐌 2.85 x slower
		* lapjvx : ✅ Passed 🐌 1.17 x slower
		* lapjvxa : ✅ Passed 🏆 1.6 x faster
		* lapjvs : ✅ Passed 🐌 2.33 x slower
		* lapjvsa : ✅ Passed 🐌 2.1 x slower
		* lapjvc : ✅ Passed 🐌 1.64 x slower
		* lapjv : ✅ Passed 🐌 5.53 x slower
		* lapjvx : ✅ Passed 🐌 2.68 x slower
		* lapjvxa : ✅ Passed 🐌 1.81 x slower
		* lapjvs : ✅ Passed 🐌 3.56 x slower
		* lapjvsa : ✅ Passed 🐌 3.36 x slower

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvxa : 0.00001882s
		2. scipy ⭐ : 0.00003012s
		3. lapjvx : 0.00003522s
		4. lapjvsa : 0.00006335s
		5. lapjvc : 0.00006415s
		6. lapjvs : 0.00007016s
		7. lapjv : 0.00008579s
		1. scipy ⭐ : 0.00001024s
		2. lapjvc : 0.00001677s
		3. lapjvxa : 0.00001853s
		4. lapjvx : 0.00002740s
		5. lapjvsa : 0.00003439s
		6. lapjvs : 0.00003648s
		7. lapjv : 0.00005660s
		-------------------------------
		@@ -110,17 +125,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 1.5 x slower
		* lapjv : ✅ Passed 🐌 1.9 x slower
		* lapjvx : ✅ Passed 🐌 1.3 x slower
		* lapjvxa : ✅ Passed 🏆 1.15 x faster
		* lapjvs : ✅ Passed 🐌 1.92 x slower
		* lapjvsa : ✅ Passed 🏆 1.79 x faster
		* lapjvc : ✅ Passed 🐌 2.03 x slower
		* lapjv : ✅ Passed 🐌 5.72 x slower
		* lapjvx : ✅ Passed 🐌 2.28 x slower
		* lapjvxa : ✅ Passed 🐌 1.75 x slower
		* lapjvs : ✅ Passed 🐌 2.7 x slower
		* lapjvsa : ✅ Passed 🐌 1.04 x slower

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvsa : 0.00000646s
		2. lapjvxa : 0.00001002s
		3. scipy ⭐ : 0.00001154s
		4. lapjvx : 0.00001498s
		5. lapjvc : 0.00001732s
		6. lapjv : 0.00002196s
		7. lapjvs : 0.00002215s
		1. scipy ⭐ : 0.00000664s
		2. lapjvsa : 0.00000690s
		3. lapjvxa : 0.00001165s
		4. lapjvc : 0.00001346s
		5. lapjvx : 0.00001517s
		6. lapjvs : 0.00001796s
		7. lapjv : 0.00003800s
		-------------------------------
		@@ -131,17 +146,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 1.82 x slower
		* lapjv : ✅ Passed 🐌 3.92 x slower
		* lapjvx : ✅ Passed 🐌 2.34 x slower
		* lapjvxa : ✅ Passed 🐌 1.69 x slower
		* lapjvs : ✅ Passed 🐌 3.3 x slower
		* lapjvsa : ✅ Passed 🐌 5.28 x slower
		* lapjvc : ✅ Passed 🐌 2.3 x slower
		* lapjv : ✅ Passed 🐌 9.53 x slower
		* lapjvx : ✅ Passed 🐌 3.62 x slower
		* lapjvxa : ✅ Passed 🐌 3.04 x slower
		* lapjvs : ✅ Passed 🐌 4.63 x slower
		* lapjvsa : ✅ Passed 🐌 5.32 x slower

		----- 🎉 SPEED RANKING 🎉 -----
		1. scipy ⭐ : 0.00000862s
		2. lapjvxa : 0.00001455s
		3. lapjvc : 0.00001566s
		4. lapjvx : 0.00002017s
		5. lapjvs : 0.00002845s
		6. lapjv : 0.00003373s
		7. lapjvsa : 0.00004552s
		1. scipy ⭐ : 0.00000537s
		2. lapjvc : 0.00001233s
		3. lapjvxa : 0.00001631s
		4. lapjvx : 0.00001947s
		5. lapjvs : 0.00002489s
		6. lapjvsa : 0.00002857s
		7. lapjv : 0.00005116s
		-------------------------------
		@@ -152,17 +167,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 1.87 x slower
		* lapjv : ✅ Passed 🏆 1.19 x faster
		* lapjvx : ✅ Passed 🏆 1.62 x faster
		* lapjvxa : ✅ Passed 🏆 2.35 x faster
		* lapjvs : ✅ Passed 🏆 1.33 x faster
		* lapjvsa : ✅ Passed 🏆 1.32 x faster
		* lapjvc : ✅ Passed 🐌 1.94 x slower
		* lapjv : ✅ Passed 🐌 1.34 x slower
		* lapjvx : ✅ Passed 🏆 1.15 x faster
		* lapjvxa : ✅ Passed 🏆 1.41 x faster
		* lapjvs : ✅ Passed 🐌 1.98 x slower
		* lapjvsa : ✅ Passed 🏆 1.13 x faster

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvxa : 0.00003084s
		2. lapjvx : 0.00004490s
		3. lapjvs : 0.00005469s
		4. lapjvsa : 0.00005475s
		5. lapjv : 0.00006076s
		6. scipy ⭐ : 0.00007253s
		7. lapjvc : 0.00013553s
		1. lapjvxa : 0.00003351s
		2. lapjvx : 0.00004110s
		3. lapjvsa : 0.00004164s
		4. scipy ⭐ : 0.00004721s
		5. lapjv : 0.00006310s
		6. lapjvc : 0.00009150s
		7. lapjvs : 0.00009357s
		-------------------------------
		@@ -173,17 +188,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🏆 1.19 x faster
		* lapjv : ✅ Passed 🏆 2.1 x faster
		* lapjvx : ✅ Passed 🏆 2.77 x faster
		* lapjvxa : ✅ Passed 🏆 4.34 x faster
		* lapjvs : ✅ Passed 🏆 2.3 x faster
		* lapjvsa : ✅ Passed 🏆 5.58 x faster
		* lapjvc : ✅ Passed 🏆 1.43 x faster
		* lapjv : ✅ Passed 🏆 1.44 x faster
		* lapjvx : ✅ Passed 🏆 2.25 x faster
		* lapjvxa : ✅ Passed 🏆 2.94 x faster
		* lapjvs : ✅ Passed 🏆 2.27 x faster
		* lapjvsa : ✅ Passed 🏆 3.99 x faster

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvsa : 0.00001290s
		2. lapjvxa : 0.00001658s
		3. lapjvx : 0.00002601s
		4. lapjvs : 0.00003129s
		5. lapjv : 0.00003426s
		6. lapjvc : 0.00006070s
		7. scipy ⭐ : 0.00007195s
		1. lapjvsa : 0.00002166s
		2. lapjvxa : 0.00002932s
		3. lapjvs : 0.00003803s
		4. lapjvx : 0.00003831s
		5. lapjv : 0.00005988s
		6. lapjvc : 0.00006028s
		7. scipy ⭐ : 0.00008634s
		-------------------------------
		@@ -194,17 +209,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 1.59 x slower
		* lapjv : ✅ Passed 🏆 1.14 x faster
		* lapjvx : ✅ Passed 🏆 1.65 x faster
		* lapjvxa : ✅ Passed 🏆 2.33 x faster
		* lapjvs : ✅ Passed 🏆 1.58 x faster
		* lapjvsa : ✅ Passed 🏆 1.37 x faster
		* lapjvc : ✅ Passed 🐌 1.22 x slower
		* lapjv : ✅ Passed 🏆 1.07 x faster
		* lapjvx : ✅ Passed 🏆 1.54 x faster
		* lapjvxa : ✅ Passed 🏆 1.88 x faster
		* lapjvs : ✅ Passed 🏆 1.63 x faster
		* lapjvsa : ✅ Passed 🏆 1.47 x faster

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvxa : 0.00003199s
		2. lapjvx : 0.00004507s
		3. lapjvs : 0.00004723s
		4. lapjvsa : 0.00005446s
		5. lapjv : 0.00006505s
		6. scipy ⭐ : 0.00007443s
		7. lapjvc : 0.00011813s
		1. lapjvxa : 0.00004026s
		2. lapjvs : 0.00004654s
		3. lapjvx : 0.00004924s
		4. lapjvsa : 0.00005152s
		5. lapjv : 0.00007051s
		6. scipy ⭐ : 0.00007566s
		7. lapjvc : 0.00009201s
		-------------------------------
		@@ -215,17 +230,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 4.19 x slower
		* lapjv : ✅ Passed 🏆 2.46 x faster
		* lapjvx : ✅ Passed 🏆 2.84 x faster
		* lapjvxa : ✅ Passed 🏆 3.94 x faster
		* lapjvs : ✅ Passed 🏆 4.37 x faster
		* lapjvsa : ✅ Passed 🏆 4.45 x faster
		* lapjvc : ✅ Passed 🐌 4.97 x slower
		* lapjv : ✅ Passed 🏆 2.02 x faster
		* lapjvx : ✅ Passed 🏆 2.34 x faster
		* lapjvxa : ✅ Passed 🏆 3.09 x faster
		* lapjvs : ✅ Passed 🏆 3.95 x faster
		* lapjvsa : ✅ Passed 🏆 3.89 x faster

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvsa : 0.00108461s
		2. lapjvs : 0.00110318s
		3. lapjvxa : 0.00122390s
		4. lapjvx : 0.00169714s
		5. lapjv : 0.00195890s
		6. scipy ⭐ : 0.00482560s
		7. lapjvc : 0.02019986s
		1. lapjvs : 0.00116294s
		2. lapjvsa : 0.00117967s
		3. lapjvxa : 0.00148459s
		4. lapjvx : 0.00196006s
		5. lapjv : 0.00227485s
		6. scipy ⭐ : 0.00458916s
		7. lapjvc : 0.02279758s
		-------------------------------
		@@ -236,17 +251,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 1.01 x slower
		* lapjv : ✅ Passed 🏆 2.0 x faster
		* lapjvx : ✅ Passed 🏆 2.06 x faster
		* lapjvxa : ✅ Passed 🏆 2.06 x faster
		* lapjvs : ✅ Passed 🏆 2.05 x faster
		* lapjvsa : ✅ Passed 🏆 2.06 x faster
		* lapjvc : ✅ Passed 🏆 1.46 x faster
		* lapjv : ✅ Passed 🏆 1.19 x faster
		* lapjvx : ✅ Passed 🏆 1.19 x faster
		* lapjvxa : ✅ Passed 🏆 1.21 x faster
		* lapjvs : ✅ Passed 🏆 1.58 x faster
		* lapjvsa : ✅ Passed 🏆 1.6 x faster

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvsa : 0.00498536s
		2. lapjvxa : 0.00499154s
		3. lapjvx : 0.00499524s
		4. lapjvs : 0.00501234s
		5. lapjv : 0.00512501s
		6. scipy ⭐ : 0.01026616s
		7. lapjvc : 0.01041151s
		1. lapjvsa : 0.00610949s
		2. lapjvs : 0.00617076s
		3. lapjvc : 0.00669765s
		4. lapjvxa : 0.00807378s
		5. lapjvx : 0.00817340s
		6. lapjv : 0.00822582s
		7. scipy ⭐ : 0.00976096s
		-------------------------------
		@@ -257,17 +272,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 4.17 x slower
		* lapjv : ✅ Passed 🏆 3.74 x faster
		* lapjvx : ✅ Passed 🏆 4.29 x faster
		* lapjvxa : ✅ Passed 🏆 4.37 x faster
		* lapjvs : ✅ Passed 🏆 4.33 x faster
		* lapjvsa : ✅ Passed 🏆 4.38 x faster
		* lapjvc : ✅ Passed 🐌 4.66 x slower
		* lapjv : ✅ Passed 🏆 2.07 x faster
		* lapjvx : ✅ Passed 🏆 3.44 x faster
		* lapjvxa : ✅ Passed 🏆 3.42 x faster
		* lapjvs : ✅ Passed 🏆 4.27 x faster
		* lapjvsa : ✅ Passed 🏆 4.37 x faster

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvsa : 0.00136651s
		2. lapjvxa : 0.00136884s
		3. lapjvs : 0.00138280s
		4. lapjvx : 0.00139509s
		5. lapjv : 0.00160113s
		6. scipy ⭐ : 0.00598653s
		7. lapjvc : 0.02498232s
		1. lapjvsa : 0.00128856s
		2. lapjvs : 0.00131904s
		3. lapjvx : 0.00163978s
		4. lapjvxa : 0.00164817s
		5. lapjv : 0.00272247s
		6. scipy ⭐ : 0.00563737s
		7. lapjvc : 0.02625532s
		-------------------------------
		@@ -278,17 +293,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 222.34 x slower
		* lapjv : ✅ Passed 🏆 1.15 x faster
		* lapjvx : ✅ Passed 🏆 1.31 x faster
		* lapjvxa : ✅ Passed 🏆 1.29 x faster
		* lapjvc : ✅ Passed 🐌 257.47 x slower
		* lapjv : ✅ Passed 🏆 1.09 x faster
		* lapjvx : ✅ Passed 🏆 1.09 x faster
		* lapjvxa : ✅ Passed 🏆 1.08 x faster
		* lapjvs : ✅ Passed 🐌 1.11 x slower
		* lapjvsa : ✅ Passed 🐌 1.11 x slower
		* lapjvsa : ✅ Passed 🐌 1.1 x slower

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvx : 0.07696005s
		2. lapjvxa : 0.07810211s
		3. lapjv : 0.08824028s
		4. scipy ⭐ : 0.10107496s
		5. lapjvsa : 0.11232857s
		6. lapjvs : 0.11252413s
		7. lapjvc : 22.47302152s
		1. lapjvx : 0.09400308s
		2. lapjv : 0.09424586s
		3. lapjvxa : 0.09509772s
		4. scipy ⭐ : 0.10258777s
		5. lapjvsa : 0.11241747s
		6. lapjvs : 0.11372150s
		7. lapjvc : 26.41295845s
		-------------------------------
		@@ -299,17 +314,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🏆 1.17 x faster
		* lapjv : ✅ Passed 🏆 1.3 x faster
		* lapjvx : ✅ Passed 🏆 1.31 x faster
		* lapjvxa : ✅ Passed 🏆 1.31 x faster
		* lapjvs : ✅ Passed 🏆 2.06 x faster
		* lapjvsa : ✅ Passed 🏆 2.06 x faster
		* lapjvc : ✅ Passed 🐌 1.02 x slower
		* lapjv : ✅ Passed 🐌 1.62 x slower
		* lapjvx : ✅ Passed 🐌 1.62 x slower
		* lapjvxa : ✅ Passed 🐌 1.62 x slower
		* lapjvs : ✅ Passed 🏆 1.76 x faster
		* lapjvsa : ✅ Passed 🏆 1.76 x faster

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvs : 1.16571718s
		2. lapjvsa : 1.16708573s
		3. lapjvxa : 1.84065010s
		4. lapjvx : 1.84106529s
		5. lapjv : 1.84539000s
		6. lapjvc : 2.04553916s
		7. scipy ⭐ : 2.40261425s
		1. lapjvs : 1.34793133s
		2. lapjvsa : 1.34966543s
		3. scipy ⭐ : 2.37237136s
		4. lapjvc : 2.41397720s
		5. lapjvxa : 3.84284193s
		6. lapjvx : 3.84922083s
		7. lapjv : 3.85101395s
		-------------------------------
		@@ -320,17 +335,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 230.42 x slower
		* lapjv : ✅ Passed 🏆 2.24 x faster
		* lapjvx : ✅ Passed 🏆 2.54 x faster
		* lapjvxa : ✅ Passed 🏆 2.5 x faster
		* lapjvs : ✅ Passed 🏆 1.66 x faster
		* lapjvsa : ✅ Passed 🏆 1.68 x faster
		* lapjvc : ✅ Passed 🐌 273.6 x slower
		* lapjv : ✅ Passed 🏆 2.03 x faster
		* lapjvx : ✅ Passed 🏆 2.04 x faster
		* lapjvxa : ✅ Passed 🏆 2.05 x faster
		* lapjvs : ✅ Passed 🏆 1.59 x faster
		* lapjvsa : ✅ Passed 🏆 1.63 x faster

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvx : 0.16310518s
		2. lapjvxa : 0.16545943s
		3. lapjv : 0.18474822s
		4. lapjvsa : 0.24673601s
		5. lapjvs : 0.24954140s
		6. scipy ⭐ : 0.41429755s
		7. lapjvc : 95.46102137s
		1. lapjvxa : 0.19721307s
		2. lapjvx : 0.19786040s
		3. lapjv : 0.19958704s
		4. lapjvsa : 0.24835583s
		5. lapjvs : 0.25347052s
		6. scipy ⭐ : 0.40418303s
		7. lapjvc : 110.58635478s
		-------------------------------
		@@ -343,3 +358,3 @@ ```

		https://github.com/rathaROG/lapx/actions/runs/18851354956/job/53788495229
		https://github.com/rathaROG/lapx/actions/runs/18961613065/job/54149890234

		@@ -350,17 +365,17 @@ ```
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 1.65 x slower
		* lapjv : ✅ Passed 🐌 4.13 x slower
		* lapjvx : ✅ Passed 🐌 1.26 x slower
		* lapjvxa : ✅ Passed 🏆 3.06 x faster
		* lapjvs : ✅ Passed 🐌 1.52 x slower
		* lapjvsa : ✅ Passed 🐌 1.46 x slower
		* lapjvc : ✅ Passed 🐌 1.71 x slower
		* lapjv : ✅ Passed 🐌 5.14 x slower
		* lapjvx : ✅ Passed 🐌 2.32 x slower
		* lapjvxa : ✅ Passed 🐌 1.57 x slower
		* lapjvs : ✅ Passed 🐌 3.57 x slower
		* lapjvsa : ✅ Passed 🐌 3.25 x slower

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvxa : 0.00001037s
		2. scipy ⭐ : 0.00003179s
		3. lapjvx : 0.00003996s
		4. lapjvsa : 0.00004642s
		5. lapjvs : 0.00004833s
		6. lapjvc : 0.00005250s
		7. lapjv : 0.00013146s
		1. scipy ⭐ : 0.00000567s
		2. lapjvxa : 0.00000888s
		3. lapjvc : 0.00000971s
		4. lapjvx : 0.00001317s
		5. lapjvsa : 0.00001842s
		6. lapjvs : 0.00002025s
		7. lapjv : 0.00002913s
		-------------------------------
		@@ -371,17 +386,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 1.51 x slower
		* lapjv : ✅ Passed 🐌 1.65 x slower
		* lapjvx : ✅ Passed 🐌 1.09 x slower
		* lapjvxa : ✅ Passed 🏆 1.27 x faster
		* lapjvs : ✅ Passed 🐌 1.67 x slower
		* lapjvsa : ✅ Passed 🏆 1.99 x faster
		* lapjvc : ✅ Passed 🐌 1.83 x slower
		* lapjv : ✅ Passed 🐌 4.14 x slower
		* lapjvx : ✅ Passed 🐌 1.83 x slower
		* lapjvxa : ✅ Passed 🐌 1.59 x slower
		* lapjvs : ✅ Passed 🐌 2.01 x slower
		* lapjvsa : ✅ Passed 🏆 1.32 x faster

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvsa : 0.00000308s
		2. lapjvxa : 0.00000483s
		3. scipy ⭐ : 0.00000613s
		4. lapjvx : 0.00000667s
		5. lapjvc : 0.00000925s
		6. lapjv : 0.00001013s
		7. lapjvs : 0.00001021s
		1. lapjvsa : 0.00000283s
		2. scipy ⭐ : 0.00000375s
		3. lapjvxa : 0.00000596s
		4. lapjvc : 0.00000687s
		5. lapjvx : 0.00000687s
		6. lapjvs : 0.00000754s
		7. lapjv : 0.00001554s
		-------------------------------
		@@ -392,17 +407,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 1.95 x slower
		* lapjv : ✅ Passed 🐌 3.42 x slower
		* lapjvx : ✅ Passed 🐌 2.1 x slower
		* lapjvxa : ✅ Passed 🐌 1.68 x slower
		* lapjvs : ✅ Passed 🐌 3.23 x slower
		* lapjvsa : ✅ Passed 🐌 4.63 x slower
		* lapjvc : ✅ Passed 🐌 2.66 x slower
		* lapjv : ✅ Passed 🐌 5.96 x slower
		* lapjvx : ✅ Passed 🐌 3.03 x slower
		* lapjvxa : ✅ Passed 🐌 2.6 x slower
		* lapjvs : ✅ Passed 🐌 3.77 x slower
		* lapjvsa : ✅ Passed 🐌 4.37 x slower

		----- 🎉 SPEED RANKING 🎉 -----
		1. scipy ⭐ : 0.00000429s
		2. lapjvxa : 0.00000721s
		3. lapjvc : 0.00000837s
		4. lapjvx : 0.00000900s
		5. lapjvs : 0.00001387s
		6. lapjv : 0.00001467s
		7. lapjvsa : 0.00001988s
		1. scipy ⭐ : 0.00000292s
		2. lapjvxa : 0.00000758s
		3. lapjvc : 0.00000775s
		4. lapjvx : 0.00000883s
		5. lapjvs : 0.00001100s
		6. lapjvsa : 0.00001275s
		7. lapjv : 0.00001738s
		-------------------------------
		@@ -413,17 +428,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 1.49 x slower
		* lapjv : ✅ Passed 🏆 1.7 x faster
		* lapjvx : ✅ Passed 🏆 2.13 x faster
		* lapjvxa : ✅ Passed 🏆 2.67 x faster
		* lapjvs : ✅ Passed 🏆 1.8 x faster
		* lapjvsa : ✅ Passed 🏆 1.89 x faster
		* lapjvc : ✅ Passed 🐌 2.19 x slower
		* lapjv : ✅ Passed 🏆 1.34 x faster
		* lapjvx : ✅ Passed 🏆 1.83 x faster
		* lapjvxa : ✅ Passed 🏆 2.24 x faster
		* lapjvs : ✅ Passed 🏆 1.74 x faster
		* lapjvsa : ✅ Passed 🏆 1.22 x faster

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvxa : 0.00002146s
		2. lapjvx : 0.00002683s
		3. lapjvsa : 0.00003033s
		4. lapjvs : 0.00003183s
		5. lapjv : 0.00003358s
		6. scipy ⭐ : 0.00005721s
		7. lapjvc : 0.00008517s
		1. lapjvxa : 0.00001796s
		2. lapjvx : 0.00002200s
		3. lapjvs : 0.00002308s
		4. lapjv : 0.00002992s
		5. lapjvsa : 0.00003283s
		6. scipy ⭐ : 0.00004017s
		7. lapjvc : 0.00008783s
		-------------------------------
		@@ -434,17 +449,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🏆 1.41 x faster
		* lapjv : ✅ Passed 🏆 3.95 x faster
		* lapjvx : ✅ Passed 🏆 4.4 x faster
		* lapjvxa : ✅ Passed 🏆 6.29 x faster
		* lapjvs : ✅ Passed 🏆 3.59 x faster
		* lapjvsa : ✅ Passed 🏆 6.65 x faster
		* lapjvc : ✅ Passed 🏆 1.16 x faster
		* lapjv : ✅ Passed 🏆 2.13 x faster
		* lapjvx : ✅ Passed 🏆 2.69 x faster
		* lapjvxa : ✅ Passed 🏆 3.29 x faster
		* lapjvs : ✅ Passed 🏆 2.35 x faster
		* lapjvsa : ✅ Passed 🏆 3.41 x faster

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvsa : 0.00001100s
		2. lapjvxa : 0.00001162s
		3. lapjvx : 0.00001662s
		4. lapjv : 0.00001850s
		5. lapjvs : 0.00002038s
		6. lapjvc : 0.00005183s
		7. scipy ⭐ : 0.00007312s
		1. lapjvsa : 0.00002146s
		2. lapjvxa : 0.00002221s
		3. lapjvx : 0.00002721s
		4. lapjvs : 0.00003108s
		5. lapjv : 0.00003437s
		6. lapjvc : 0.00006300s
		7. scipy ⭐ : 0.00007317s
		-------------------------------
		@@ -455,17 +470,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 2.17 x slower
		* lapjv : ✅ Passed 🏆 1.56 x faster
		* lapjvx : ✅ Passed 🏆 1.73 x faster
		* lapjvxa : ✅ Passed 🏆 2.29 x faster
		* lapjvs : ✅ Passed 🏆 1.46 x faster
		* lapjvsa : ✅ Passed 🏆 1.4 x faster
		* lapjvc : ✅ Passed 🐌 2.31 x slower
		* lapjv : ✅ Passed 🏆 1.15 x faster
		* lapjvx : ✅ Passed 🏆 1.54 x faster
		* lapjvxa : ✅ Passed 🏆 1.89 x faster
		* lapjvs : ✅ Passed 🏆 1.47 x faster
		* lapjvsa : ✅ Passed 🏆 1.53 x faster

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvxa : 0.00001733s
		2. lapjvx : 0.00002292s
		3. lapjv : 0.00002546s
		4. lapjvs : 0.00002713s
		5. lapjvsa : 0.00002838s
		6. scipy ⭐ : 0.00003962s
		7. lapjvc : 0.00008579s
		1. lapjvxa : 0.00001904s
		2. lapjvx : 0.00002342s
		3. lapjvsa : 0.00002350s
		4. lapjvs : 0.00002458s
		5. lapjv : 0.00003133s
		6. scipy ⭐ : 0.00003604s
		7. lapjvc : 0.00008329s
		-------------------------------
		@@ -476,17 +491,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 5.08 x slower
		* lapjv : ✅ Passed 🏆 1.44 x faster
		* lapjvx : ✅ Passed 🏆 1.95 x faster
		* lapjvxa : ✅ Passed 🏆 3.47 x faster
		* lapjvs : ✅ Passed 🏆 2.22 x faster
		* lapjvsa : ✅ Passed 🏆 2.18 x faster
		* lapjvc : ✅ Passed 🐌 6.57 x slower
		* lapjv : ✅ Passed 🏆 3.31 x faster
		* lapjvx : ✅ Passed 🏆 3.69 x faster
		* lapjvxa : ✅ Passed 🏆 3.81 x faster
		* lapjvs : ✅ Passed 🏆 2.86 x faster
		* lapjvsa : ✅ Passed 🏆 3.05 x faster

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvxa : 0.00098750s
		2. lapjvs : 0.00154171s
		3. lapjvsa : 0.00157042s
		4. lapjvx : 0.00175387s
		5. lapjv : 0.00237538s
		6. scipy ⭐ : 0.00342258s
		7. lapjvc : 0.01738238s
		1. lapjvxa : 0.00073746s
		2. lapjvx : 0.00076150s
		3. lapjv : 0.00085017s
		4. lapjvsa : 0.00092200s
		5. lapjvs : 0.00098329s
		6. scipy ⭐ : 0.00281312s
		7. lapjvc : 0.01849563s
		-------------------------------
		@@ -497,17 +512,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 1.23 x slower
		* lapjv : ✅ Passed 🏆 3.51 x faster
		* lapjvx : ✅ Passed 🏆 3.61 x faster
		* lapjvxa : ✅ Passed 🏆 3.69 x faster
		* lapjvs : ✅ Passed 🏆 3.29 x faster
		* lapjvsa : ✅ Passed 🏆 3.42 x faster
		* lapjvc : ✅ Passed 🐌 1.37 x slower
		* lapjv : ✅ Passed 🏆 1.09 x faster
		* lapjvx : ✅ Passed 🏆 1.11 x faster
		* lapjvxa : ✅ Passed 🏆 1.18 x faster
		* lapjvs : ✅ Passed 🏆 1.06 x faster
		* lapjvsa : ✅ Passed 🏆 1.04 x faster

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvxa : 0.00192367s
		2. lapjvx : 0.00196654s
		3. lapjv : 0.00201971s
		4. lapjvsa : 0.00207583s
		5. lapjvs : 0.00215921s
		6. scipy ⭐ : 0.00709604s
		7. lapjvc : 0.00875521s
		1. lapjvxa : 0.00629000s
		2. lapjvx : 0.00669658s
		3. lapjv : 0.00680446s
		4. lapjvs : 0.00702367s
		5. lapjvsa : 0.00716958s
		6. scipy ⭐ : 0.00744075s
		7. lapjvc : 0.01022246s
		-------------------------------
		@@ -518,17 +533,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 5.09 x slower
		* lapjv : ✅ Passed 🏆 3.0 x faster
		* lapjvx : ✅ Passed 🏆 3.6 x faster
		* lapjvxa : ✅ Passed 🏆 3.71 x faster
		* lapjvs : ✅ Passed 🏆 2.83 x faster
		* lapjvsa : ✅ Passed 🏆 2.74 x faster
		* lapjvc : ✅ Passed 🐌 5.87 x slower
		* lapjv : ✅ Passed 🏆 2.05 x faster
		* lapjvx : ✅ Passed 🏆 3.86 x faster
		* lapjvxa : ✅ Passed 🏆 2.83 x faster
		* lapjvs : ✅ Passed 🏆 3.29 x faster
		* lapjvsa : ✅ Passed 🏆 3.35 x faster

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvxa : 0.00110967s
		2. lapjvx : 0.00114104s
		3. lapjv : 0.00137046s
		4. lapjvs : 0.00145308s
		5. lapjvsa : 0.00150308s
		6. scipy ⭐ : 0.00411283s
		7. lapjvc : 0.02093913s
		1. lapjvx : 0.00102792s
		2. lapjvsa : 0.00118358s
		3. lapjvs : 0.00120550s
		4. lapjvxa : 0.00140042s
		5. lapjv : 0.00192958s
		6. scipy ⭐ : 0.00396425s
		7. lapjvc : 0.02326779s
		-------------------------------
		@@ -539,17 +554,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 199.7 x slower
		* lapjv : ✅ Passed 🐌 1.48 x slower
		* lapjvx : ✅ Passed 🏆 1.17 x faster
		* lapjvxa : ✅ Passed 🏆 1.08 x faster
		* lapjvs : ✅ Passed 🐌 1.75 x slower
		* lapjvsa : ✅ Passed 🐌 1.58 x slower
		* lapjvc : ✅ Passed 🐌 349.67 x slower
		* lapjv : ✅ Passed 🐌 3.52 x slower
		* lapjvx : ✅ Passed 🐌 1.43 x slower
		* lapjvxa : ✅ Passed 🐌 1.45 x slower
		* lapjvs : ✅ Passed 🐌 3.18 x slower
		* lapjvsa : ✅ Passed 🐌 2.84 x slower

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvx : 0.09995704s
		2. lapjvxa : 0.10858558s
		3. scipy ⭐ : 0.11726183s
		4. lapjv : 0.17386667s
		5. lapjvsa : 0.18564625s
		6. lapjvs : 0.20479308s
		7. lapjvc : 23.41745225s
		1. scipy ⭐ : 0.07873675s
		2. lapjvx : 0.11293429s
		3. lapjvxa : 0.11401713s
		4. lapjvsa : 0.22370100s
		5. lapjvs : 0.25039458s
		6. lapjv : 0.27737229s
		7. lapjvc : 27.53160242s
		-------------------------------
		@@ -560,17 +575,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 2.01 x slower
		* lapjv : ✅ Passed 🏆 1.21 x faster
		* lapjvx : ✅ Passed 🏆 1.25 x faster
		* lapjvxa : ✅ Passed 🏆 1.19 x faster
		* lapjvs : ✅ Passed 🏆 1.73 x faster
		* lapjvsa : ✅ Passed 🏆 1.84 x faster
		* lapjvc : ✅ Passed 🐌 1.35 x slower
		* lapjv : ✅ Passed 🏆 2.36 x faster
		* lapjvx : ✅ Passed 🏆 2.15 x faster
		* lapjvxa : ✅ Passed 🏆 2.23 x faster
		* lapjvs : ✅ Passed 🏆 2.52 x faster
		* lapjvsa : ✅ Passed 🏆 2.89 x faster

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvsa : 0.92814183s
		2. lapjvs : 0.98585971s
		3. lapjvx : 1.36841413s
		4. lapjv : 1.41392200s
		5. lapjvxa : 1.43138329s
		6. scipy ⭐ : 1.70584088s
		7. lapjvc : 3.43550417s
		1. lapjvsa : 0.90473763s
		2. lapjvs : 1.03581571s
		3. lapjv : 1.10758192s
		4. lapjvxa : 1.17104621s
		5. lapjvx : 1.21566396s
		6. scipy ⭐ : 2.61083117s
		7. lapjvc : 3.53453567s
		-------------------------------
		@@ -581,17 +596,17 @@
		-----------------------------------------
		* lapjvc : ✅ Passed 🐌 283.01 x slower
		* lapjv : ✅ Passed 🐌 2.24 x slower
		* lapjvx : ✅ Passed 🏆 1.39 x faster
		* lapjvxa : ✅ Passed 🏆 2.27 x faster
		* lapjvs : ✅ Passed 🐌 1.5 x slower
		* lapjvsa : ✅ Passed 🐌 1.79 x slower
		* lapjvc : ✅ Passed 🐌 308.42 x slower
		* lapjv : ✅ Passed 🐌 2.1 x slower
		* lapjvx : ✅ Passed 🏆 1.32 x faster
		* lapjvxa : ✅ Passed 🏆 1.87 x faster
		* lapjvs : ✅ Passed 🐌 2.3 x slower
		* lapjvsa : ✅ Passed 🐌 1.98 x slower

		----- 🎉 SPEED RANKING 🎉 -----
		1. lapjvxa : 0.16666625s
		2. lapjvx : 0.27308525s
		3. scipy ⭐ : 0.37829733s
		4. lapjvs : 0.56853200s
		5. lapjvsa : 0.67584400s
		6. lapjv : 0.84921917s
		7. lapjvc : 107.06137171s
		1. lapjvxa : 0.18422821s
		2. lapjvx : 0.26109171s
		3. scipy ⭐ : 0.34502992s
		4. lapjvsa : 0.68365575s
		5. lapjv : 0.72371579s
		6. lapjvs : 0.79274062s
		7. lapjvc : 106.41484879s
		-------------------------------
		@@ -602,3 +617,3 @@ ```

		👁️ See newer benchmark results on all platforms [here on GitHub](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml).
		👁️ See newer benchmark results on all platforms [here on GitHub](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml).

		@@ -609,3 +624,3 @@ ## 🕵️‍♂️ Other Benchmarks

		This [benchmark_tracking.py](https://github.com/rathaROG/lapx/blob/main/.github/test/benchmark_tracking.py) is specifically desinged for the Object Tracking applications, with [SciPy](https://pypi.org/project/scipy/) as the baseline.
		This [benchmark_tracking.py](https://github.com/rathaROG/lapx/blob/main/benchmarks/benchmark_tracking.py) is specifically desinged for *Object Tracking* application, with [SciPy](https://pypi.org/project/scipy/) as the baseline.

		@@ -616,7 +631,7 @@ ```
		git clone https://github.com/rathaROG/lapx.git
		cd lapx/.github/test
		cd lapx/benchmarks
		python benchmark_tracking.py
		```

		As shown in the updated benchmark results below, the new function [`lapjvx()`](https://github.com/rathaROG/lapx#2-the-new-function-lapjvx) (LAPX LAPJVX in the tables) and the original [`lapjv()`](https://github.com/rathaROG/lapx#1-the-original-function-lapjv) (LAPX LAPJV in the tables) consistently matches the baseline outputs of SciPy's [`linear_sum_assignment`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html), as indicated by “✓” and ✅ in the tables.
		As shown in the benchmark results below, the new function [`lapjvx()`](https://github.com/rathaROG/lapx#2-the-new-function-lapjvx) (LAPX LAPJVX in the tables) and the original [`lapjv()`](https://github.com/rathaROG/lapx#1-the-original-function-lapjv) (LAPX LAPJV in the tables) consistently matches the baseline outputs of SciPy's [`linear_sum_assignment`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html), as indicated by “✓” and ✅ in the tables.

		@@ -627,9 +642,19 @@ In most scenarios, `lapjvx()` and `lapjv()` demonstrate faster performance than the baseline SciPy's `linear_sum_assignment`, and they remain competitive with other LAPX variants such as [`lapjvc`](https://github.com/rathaROG/lapx#4-the-new-function-lapjvc) (LAPX LAPJVC in the tables). When in-function filtering with `cost_limit` is used, `lapjv()` (LAPX LAPJV-IFT in the tables) experiences a significant performance impact and can produce different outputs compared to SciPy's baseline, as indicated by “✗” and ⚠️ in the tables.

		💡 To achieve optimal performance of `lapjvx()` or `lapjv()` in object tracking application, follow the implementation in the current [`benchmark_tracking.py`](https://github.com/rathaROG/lapx/blob/main/.github/test/benchmark_tracking.py) script.
		💡 To achieve optimal performance of `lapjvx()` or `lapjv()` in object tracking application, follow the implementation in the current [`benchmark_tracking.py`](https://github.com/rathaROG/lapx/blob/main/benchmarks/benchmark_tracking.py) script.

		<details><summary>📊 Show the results:</summary>
		<details><summary>📊 Show the results:</summary><br>

		https://github.com/rathaROG/lapx/actions/runs/18830580672/job/53721233510
		Run on my local Windows 11 i9-13900KS (8 p-core + 8 e-core) + python 3.11.9
		```
		numpy==2.2.6
		scipy==1.16.3
		lapx @ git+https://github.com/rathaROG/lapx.git@ca0bbee8e319fe005c557d5a2bcce1148d89797c
		```

		```
		Microsoft Windows [Version 10.0.26200.7019]
		(c) Microsoft Corporation. All rights reserved.

		D:\DEV\lapx_all\tmp\lapx\benchmarks>python benchmark_tracking.py

		#################################################################
		@@ -640,13 +665,13 @@ # Benchmark with threshold (cost_limit) = 0.05
		-----------------------------------------------------------------------------------------------------------------------
		Size \| BASELINE SciPy \| LAPX LAPJV-IFT \| LAPX LAPJV \| LAPX LAPJVX \| LAPX LAPJVC \| LAPX LAPJVS
		Size \| BASELINE SciPy \| LAPX LAPJV-IFT \| LAPX LAPJV \| LAPX LAPJVX \| LAPX LAPJVC \| LAPX LAPJVS
		-----------------------------------------------------------------------------------------------------------------------
		10x10 \| 0.000325s 6th \| 0.000137s ✗ 1st \| 0.000168s ✓ 3rd \| 0.000177s ✓ 4th \| 0.000206s ✓ 5th \| 0.000162s ✓ 2nd
		25x20 \| 0.000170s 5th \| 0.000191s ✗ 6th \| 0.000155s ✓ 1st \| 0.000170s ✓ 4th \| 0.000162s ✓ 3rd \| 0.000160s ✓ 2nd
		50x50 \| 0.000265s 6th \| 0.000214s ✗ 4th \| 0.000190s ✓ 1st \| 0.000193s ✓ 2nd \| 0.000246s ✓ 5th \| 0.000194s ✓ 3rd
		100x150 \| 0.000453s 4th \| 0.001335s ✓ 6th \| 0.000402s ✓ 3rd \| 0.000396s ✓ 2nd \| 0.001067s ✓ 5th \| 0.000330s ✓ 1st
		250x250 \| 0.002854s 5th \| 0.002952s ✓ 6th \| 0.001731s ✓ 3rd \| 0.001559s ✓ 2nd \| 0.001977s ✓ 4th \| 0.001536s ✓ 1st
		550x500 \| 0.008365s 1st \| 0.064973s ✓ 6th \| 0.012927s ✓ 4th \| 0.011949s ✓ 3rd \| 0.030009s ✓ 5th \| 0.011664s ✓ 2nd
		1000x1000 \| 0.051245s 2nd \| 0.111529s ✓ 6th \| 0.057231s ✓ 5th \| 0.055981s ✓ 4th \| 0.044361s ✓ 1st \| 0.055155s ✓ 3rd
		2000x2500 \| 0.075957s 4th \| 3.645535s ✓ 6th \| 0.020558s ✓ 1st \| 0.020706s ✓ 2nd \| 2.975010s ✓ 5th \| 0.034655s ✓ 3rd
		5000x5000 \| 2.114572s 5th \| 2.563577s ✓ 6th \| 1.214149s ✓ 2nd \| 1.219787s ✓ 3rd \| 1.844269s ✓ 4th \| 0.959447s ✓ 1st
		10x10 \| 0.000063s 4th \| 0.000057s ✓ 2nd \| 0.000063s ✓ 5th \| 0.000069s ✓ 6th \| 0.000061s ✓ 3rd \| 0.000057s ✓ 1st
		25x20 \| 0.000058s 4th \| 0.000104s ✗ 6th \| 0.000062s ✓ 5th \| 0.000052s ✓ 2nd \| 0.000058s ✓ 3rd \| 0.000051s ✓ 1st
		50x50 \| 0.000083s 4th \| 0.000086s ✗ 5th \| 0.000068s ✓ 3rd \| 0.000058s ✓ 1st \| 0.000103s ✓ 6th \| 0.000063s ✓ 2nd
		100x150 \| 0.000131s 2nd \| 0.000828s ✗ 6th \| 0.000132s ✓ 3rd \| 0.000144s ✓ 4th \| 0.000680s ✓ 5th \| 0.000123s ✓ 1st
		250x250 \| 0.001126s 4th \| 0.001218s ✓ 5th \| 0.000557s ✓ 2nd \| 0.000537s ✓ 1st \| 0.001516s ✓ 6th \| 0.000605s ✓ 3rd
		550x500 \| 0.003531s 4th \| 0.011714s ✓ 5th \| 0.001424s ✓ 2nd \| 0.001358s ✓ 1st \| 0.017545s ✓ 6th \| 0.001511s ✓ 3rd
		1000x1000 \| 0.022934s 4th \| 0.026359s ✓ 5th \| 0.010415s ✓ 2nd \| 0.010320s ✓ 1st \| 0.031669s ✓ 6th \| 0.012068s ✓ 3rd
		2000x2500 \| 0.034198s 4th \| 1.627013s ✓ 6th \| 0.013647s ✓ 1st \| 0.015660s ✓ 2nd \| 1.531048s ✓ 5th \| 0.022275s ✓ 3rd
		5000x5000 \| 1.095034s 3rd \| 2.335637s ✓ 6th \| 1.082954s ✓ 2nd \| 1.103870s ✓ 4th \| 1.140890s ✓ 5th \| 0.496765s ✓ 1st
		-----------------------------------------------------------------------------------------------------------------------
		@@ -656,10 +681,10 @@

		🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
		1. LAPX LAPJVS : 1063.3028 ms \| ✅ \| 🥇x3 🥈x3 🥉x3
		2. LAPX LAPJV : 1307.5116 ms \| ✅ \| 🥇x3 🥈x1 🥉x3 🚩x1 🏳️x1
		3. LAPX LAPJVX : 1310.9174 ms \| ✅ \| 🥈x4 🥉x2 🚩x3
		4. BASELINE SciPy : 2254.2068 ms \| ⭐ \| 🥇x1 🥈x1 🚩x2 🏳️x3 🥴x2
		5. LAPX LAPJVC : 4897.3061 ms \| ✅ \| 🥇x1 🥉x1 🚩x2 🏳️x5
		6. LAPX LAPJV-IFT : 6390.4416 ms \| ⚠️ \| 🥇x1 🚩x1 🥴x7
		🎉 ------------------------------------------------------------------------- 🎉
		🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
		1. LAPX LAPJVS : 533.5186 ms \| ✅ \| 🥇x4 🥈x1 🥉x4
		2. LAPX LAPJV : 1109.3228 ms \| ✅ \| 🥇x1 🥈x4 🥉x2 🏳️x2
		3. LAPX LAPJVX : 1132.0674 ms \| ✅ \| 🥇x4 🥈x2 🚩x2 🥴x1
		4. BASELINE SciPy : 1157.1577 ms \| ⭐ \| 🥈x1 🥉x1 🚩x7
		5. LAPX LAPJVC : 2723.5708 ms \| ✅ \| 🥉x2 🏳️x3 🥴x4
		6. LAPX LAPJV-IFT : 4003.0145 ms \| ⚠️ \| 🥈x1 🏳️x4 🥴x4
		🎉 ------------------------------------------------------------------------- 🎉

		@@ -672,13 +697,13 @@
		-----------------------------------------------------------------------------------------------------------------------
		Size \| BASELINE SciPy \| LAPX LAPJV-IFT \| LAPX LAPJV \| LAPX LAPJVX \| LAPX LAPJVC \| LAPX LAPJVS
		Size \| BASELINE SciPy \| LAPX LAPJV-IFT \| LAPX LAPJV \| LAPX LAPJVX \| LAPX LAPJVC \| LAPX LAPJVS
		-----------------------------------------------------------------------------------------------------------------------
		10x10 \| 0.000188s 6th \| 0.000157s ✗ 5th \| 0.000130s ✓ 3rd \| 0.000129s ✓ 1st \| 0.000139s ✓ 4th \| 0.000129s ✓ 2nd
		25x20 \| 0.000152s 3rd \| 0.000171s ✗ 6th \| 0.000148s ✓ 1st \| 0.000149s ✓ 2nd \| 0.000159s ✓ 4th \| 0.000160s ✓ 5th
		50x50 \| 0.000245s 6th \| 0.000230s ✗ 4th \| 0.000188s ✓ 1st \| 0.000194s ✓ 2nd \| 0.000231s ✓ 5th \| 0.000197s ✓ 3rd
		100x150 \| 0.000417s 4th \| 0.001254s ✓ 6th \| 0.000334s ✓ 2nd \| 0.000333s ✓ 1st \| 0.000887s ✓ 5th \| 0.000349s ✓ 3rd
		250x250 \| 0.002642s 5th \| 0.003365s ✓ 6th \| 0.001734s ✓ 2nd \| 0.001751s ✓ 3rd \| 0.002294s ✓ 4th \| 0.001708s ✓ 1st
		550x500 \| 0.007055s 1st \| 0.127557s ✓ 6th \| 0.011708s ✓ 4th \| 0.011700s ✓ 3rd \| 0.040566s ✓ 5th \| 0.011671s ✓ 2nd
		1000x1000 \| 0.045616s 5th \| 0.085374s ✓ 6th \| 0.041577s ✓ 3rd \| 0.041732s ✓ 4th \| 0.040828s ✓ 1st \| 0.041053s ✓ 2nd
		2000x2500 \| 0.075874s 4th \| 3.594363s ✓ 6th \| 0.020592s ✓ 2nd \| 0.020426s ✓ 1st \| 2.840181s ✓ 5th \| 0.024470s ✓ 3rd
		5000x5000 \| 2.493812s 5th \| 3.415118s ✓ 6th \| 1.651438s ✓ 3rd \| 1.646662s ✓ 2nd \| 2.010495s ✓ 4th \| 0.994217s ✓ 1st
		10x10 \| 0.000048s 2nd \| 0.000055s ✗ 5th \| 0.000048s ✓ 3rd \| 0.000039s ✓ 1st \| 0.000054s ✓ 4th \| 0.000055s ✓ 6th
		25x20 \| 0.000048s 3rd \| 0.000058s ✗ 6th \| 0.000055s ✓ 4th \| 0.000047s ✓ 2nd \| 0.000055s ✓ 5th \| 0.000047s ✓ 1st
		50x50 \| 0.000077s 4th \| 0.000080s ✗ 6th \| 0.000057s ✓ 3rd \| 0.000048s ✓ 1st \| 0.000078s ✓ 5th \| 0.000051s ✓ 2nd
		100x150 \| 0.000112s 3rd \| 0.000635s ✓ 6th \| 0.000123s ✓ 4th \| 0.000092s ✓ 1st \| 0.000588s ✓ 5th \| 0.000093s ✓ 2nd
		250x250 \| 0.000991s 4th \| 0.001352s ✓ 6th \| 0.000536s ✓ 1st \| 0.000536s ✓ 2nd \| 0.001200s ✓ 5th \| 0.000591s ✓ 3rd
		550x500 \| 0.003480s 4th \| 0.010844s ✓ 5th \| 0.001426s ✓ 2nd \| 0.001311s ✓ 1st \| 0.016003s ✓ 6th \| 0.001447s ✓ 3rd
		1000x1000 \| 0.023240s 4th \| 0.026984s ✓ 5th \| 0.009923s ✓ 2nd \| 0.009682s ✓ 1st \| 0.027498s ✓ 6th \| 0.011329s ✓ 3rd
		2000x2500 \| 0.034578s 4th \| 1.563681s ✓ 5th \| 0.014135s ✓ 2nd \| 0.014121s ✓ 1st \| 1.596397s ✓ 6th \| 0.022706s ✓ 3rd
		5000x5000 \| 1.070328s 2nd \| 3.315799s ✓ 6th \| 1.622128s ✓ 4th \| 1.628149s ✓ 5th \| 1.100956s ✓ 3rd \| 0.537018s ✓ 1st
		-----------------------------------------------------------------------------------------------------------------------
		@@ -688,10 +713,10 @@

		🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
		1. LAPX LAPJVS : 1073.9525 ms \| ✅ \| 🥇x2 🥈x3 🥉x3 🏳️x1
		2. LAPX LAPJVX : 1723.0752 ms \| ✅ \| 🥇x3 🥈x3 🥉x2 🚩x1
		3. LAPX LAPJV : 1727.8499 ms \| ✅ \| 🥇x2 🥈x3 🥉x3 🚩x1
		4. BASELINE SciPy : 2626.0008 ms \| ⭐ \| 🥇x1 🥉x1 🚩x2 🏳️x3 🥴x2
		5. LAPX LAPJVC : 4935.7815 ms \| ✅ \| 🥇x1 🚩x4 🏳️x4
		6. LAPX LAPJV-IFT : 7227.5912 ms \| ⚠️ \| 🚩x1 🏳️x1 🥴x7
		🎉 ------------------------------------------------------------------------- 🎉
		🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
		1. LAPX LAPJVS : 573.3374 ms \| ✅ \| 🥇x2 🥈x2 🥉x4 🥴x1
		2. BASELINE SciPy : 1132.9018 ms \| ⭐ \| 🥈x2 🥉x2 🚩x5
		3. LAPX LAPJV : 1648.4320 ms \| ✅ \| 🥇x1 🥈x3 🥉x2 🚩x3
		4. LAPX LAPJVX : 1654.0251 ms \| ✅ \| 🥇x6 🥈x2 🏳️x1
		5. LAPX LAPJVC : 2742.8296 ms \| ✅ \| 🥉x1 🚩x1 🏳️x4 🥴x3
		6. LAPX LAPJV-IFT : 4919.4888 ms \| ⚠️ \| 🏳️x4 🥴x5
		🎉 ------------------------------------------------------------------------- 🎉

		@@ -704,13 +729,13 @@
		-----------------------------------------------------------------------------------------------------------------------
		Size \| BASELINE SciPy \| LAPX LAPJV-IFT \| LAPX LAPJV \| LAPX LAPJVX \| LAPX LAPJVC \| LAPX LAPJVS
		Size \| BASELINE SciPy \| LAPX LAPJV-IFT \| LAPX LAPJV \| LAPX LAPJVX \| LAPX LAPJVC \| LAPX LAPJVS
		-----------------------------------------------------------------------------------------------------------------------
		10x10 \| 0.000218s 6th \| 0.000129s ✓ 1st \| 0.000131s ✓ 3rd \| 0.000154s ✓ 5th \| 0.000139s ✓ 4th \| 0.000131s ✓ 2nd
		25x20 \| 0.000150s 1st \| 0.000178s ✓ 6th \| 0.000151s ✓ 2nd \| 0.000158s ✓ 4th \| 0.000163s ✓ 5th \| 0.000155s ✓ 3rd
		50x50 \| 0.000211s 5th \| 0.000194s ✓ 3rd \| 0.000283s ✓ 6th \| 0.000180s ✓ 1st \| 0.000194s ✓ 2nd \| 0.000197s ✓ 4th
		100x150 \| 0.000404s 4th \| 0.001266s ✓ 6th \| 0.000344s ✓ 3rd \| 0.000318s ✓ 1st \| 0.000955s ✓ 5th \| 0.000341s ✓ 2nd
		250x250 \| 0.002778s 6th \| 0.002573s ✓ 5th \| 0.001292s ✓ 2nd \| 0.001324s ✓ 3rd \| 0.001683s ✓ 4th \| 0.001267s ✓ 1st
		550x500 \| 0.007734s 1st \| 0.238647s ✓ 6th \| 0.012039s ✓ 4th \| 0.011927s ✓ 3rd \| 0.040219s ✓ 5th \| 0.011922s ✓ 2nd
		1000x1000 \| 0.046291s 5th \| 0.075969s ✓ 6th \| 0.036951s ✓ 2nd \| 0.037461s ✓ 3rd \| 0.039884s ✓ 4th \| 0.020911s ✓ 1st
		2000x2500 \| 0.076470s 4th \| 3.556511s ✓ 6th \| 0.020127s ✓ 1st \| 0.020713s ✓ 2nd \| 2.866433s ✓ 5th \| 0.023518s ✓ 3rd
		5000x5000 \| 2.853023s 5th \| 2.870481s ✓ 6th \| 1.372504s ✓ 3rd \| 1.367633s ✓ 2nd \| 1.949158s ✓ 4th \| 1.205538s ✓ 1st
		10x10 \| 0.000051s 6th \| 0.000045s ✓ 5th \| 0.000045s ✓ 4th \| 0.000040s ✓ 2nd \| 0.000043s ✓ 3rd \| 0.000039s ✓ 1st
		25x20 \| 0.000043s 1st \| 0.000055s ✓ 6th \| 0.000054s ✓ 4th \| 0.000046s ✓ 2nd \| 0.000054s ✓ 5th \| 0.000046s ✓ 3rd
		50x50 \| 0.000070s 4th \| 0.000076s ✓ 5th \| 0.000060s ✓ 3rd \| 0.000049s ✓ 1st \| 0.000089s ✓ 6th \| 0.000054s ✓ 2nd
		100x150 \| 0.000113s 4th \| 0.000646s ✓ 6th \| 0.000103s ✓ 3rd \| 0.000095s ✓ 2nd \| 0.000616s ✓ 5th \| 0.000095s ✓ 1st
		250x250 \| 0.001064s 4th \| 0.001522s ✓ 6th \| 0.000643s ✓ 2nd \| 0.000591s ✓ 1st \| 0.001448s ✓ 5th \| 0.000673s ✓ 3rd
		550x500 \| 0.003672s 4th \| 0.010797s ✓ 5th \| 0.001429s ✓ 2nd \| 0.001405s ✓ 1st \| 0.015196s ✓ 6th \| 0.001497s ✓ 3rd
		1000x1000 \| 0.019571s 4th \| 0.027457s ✓ 6th \| 0.010368s ✓ 1st \| 0.011375s ✓ 3rd \| 0.024061s ✓ 5th \| 0.010495s ✓ 2nd
		2000x2500 \| 0.038530s 4th \| 1.654156s ✓ 6th \| 0.015500s ✓ 2nd \| 0.014464s ✓ 1st \| 1.561805s ✓ 5th \| 0.022967s ✓ 3rd
		5000x5000 \| 0.969325s 5th \| 1.507703s ✓ 6th \| 0.668259s ✓ 3rd \| 0.656468s ✓ 2nd \| 0.954102s ✓ 4th \| 0.475278s ✓ 1st
		-----------------------------------------------------------------------------------------------------------------------
		@@ -720,10 +745,10 @@

		🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
		1. LAPX LAPJVS : 1263.9814 ms \| ✅ \| 🥇x3 🥈x3 🥉x2 🚩x1
		2. LAPX LAPJVX : 1439.8675 ms \| ✅ \| 🥇x2 🥈x2 🥉x3 🚩x1 🏳️x1
		3. LAPX LAPJV : 1443.8227 ms \| ✅ \| 🥇x1 🥈x3 🥉x3 🚩x1 🥴x1
		4. BASELINE SciPy : 2987.2792 ms \| ⭐ \| 🥇x2 🚩x2 🏳️x3 🥴x2
		5. LAPX LAPJVC : 4898.8268 ms \| ✅ \| 🥈x1 🚩x4 🏳️x4
		6. LAPX LAPJV-IFT : 6745.9482 ms \| ✅ \| 🥇x1 🥉x1 🏳️x1 🥴x6
		🎉 ------------------------------------------------------------------------- 🎉
		🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
		1. LAPX LAPJVS : 511.1457 ms \| ✅ \| 🥇x3 🥈x2 🥉x4
		2. LAPX LAPJVX : 684.5318 ms \| ✅ \| 🥇x4 🥈x4 🥉x1
		3. LAPX LAPJV : 696.4601 ms \| ✅ \| 🥇x1 🥈x3 🥉x3 🚩x2
		4. BASELINE SciPy : 1032.4388 ms \| ⭐ \| 🥇x1 🚩x6 🏳️x1 🥴x1
		5. LAPX LAPJVC : 2557.4136 ms \| ✅ \| 🥉x1 🚩x1 🏳️x5 🥴x2
		6. LAPX LAPJV-IFT : 3202.4579 ms \| ✅ \| 🏳️x3 🥴x6
		🎉 ------------------------------------------------------------------------- 🎉

		@@ -736,13 +761,13 @@
		-----------------------------------------------------------------------------------------------------------------------
		Size \| BASELINE SciPy \| LAPX LAPJV-IFT \| LAPX LAPJV \| LAPX LAPJVX \| LAPX LAPJVC \| LAPX LAPJVS
		Size \| BASELINE SciPy \| LAPX LAPJV-IFT \| LAPX LAPJV \| LAPX LAPJVX \| LAPX LAPJVC \| LAPX LAPJVS
		-----------------------------------------------------------------------------------------------------------------------
		10x10 \| 0.000233s 6th \| 0.000127s ✓ 1st \| 0.000158s ✓ 5th \| 0.000128s ✓ 2nd \| 0.000142s ✓ 4th \| 0.000129s ✓ 3rd
		25x20 \| 0.000144s 1st \| 0.000165s ✓ 5th \| 0.000172s ✓ 6th \| 0.000159s ✓ 4th \| 0.000155s ✓ 3rd \| 0.000148s ✓ 2nd
		50x50 \| 0.000269s 6th \| 0.000203s ✓ 3rd \| 0.000184s ✓ 1st \| 0.000191s ✓ 2nd \| 0.000223s ✓ 4th \| 0.000254s ✓ 5th
		100x150 \| 0.000417s 4th \| 0.001233s ✓ 6th \| 0.000308s ✓ 1st \| 0.000356s ✓ 3rd \| 0.001072s ✓ 5th \| 0.000326s ✓ 2nd
		250x250 \| 0.002866s 5th \| 0.003220s ✓ 6th \| 0.001664s ✓ 1st \| 0.001702s ✓ 3rd \| 0.002252s ✓ 4th \| 0.001676s ✓ 2nd
		550x500 \| 0.008314s 1st \| 0.249585s ✓ 6th \| 0.011429s ✓ 2nd \| 0.011442s ✓ 3rd \| 0.030332s ✓ 5th \| 0.011470s ✓ 4th
		1000x1000 \| 0.046902s 5th \| 0.080581s ✓ 6th \| 0.039630s ✓ 2nd \| 0.039960s ✓ 3rd \| 0.045573s ✓ 4th \| 0.038703s ✓ 1st
		2000x2500 \| 0.074322s 4th \| 3.490300s ✓ 6th \| 0.020954s ✓ 1st \| 0.021199s ✓ 2nd \| 2.761126s ✓ 5th \| 0.024095s ✓ 3rd
		5000x5000 \| 2.614220s 5th \| 4.553976s ✓ 6th \| 2.238387s ✓ 3rd \| 2.240301s ✓ 4th \| 1.912648s ✓ 2nd \| 1.146587s ✓ 1st
		10x10 \| 0.000049s 6th \| 0.000046s ✓ 4th \| 0.000046s ✓ 5th \| 0.000038s ✓ 1st \| 0.000044s ✓ 3rd \| 0.000041s ✓ 2nd
		25x20 \| 0.000040s 1st \| 0.000055s ✓ 6th \| 0.000052s ✓ 5th \| 0.000043s ✓ 2nd \| 0.000051s ✓ 4th \| 0.000045s ✓ 3rd
		50x50 \| 0.000067s 4th \| 0.000074s ✓ 5th \| 0.000058s ✓ 3rd \| 0.000053s ✓ 2nd \| 0.000081s ✓ 6th \| 0.000053s ✓ 1st
		100x150 \| 0.000117s 2nd \| 0.000752s ✓ 6th \| 0.000123s ✓ 3rd \| 0.000126s ✓ 4th \| 0.000721s ✓ 5th \| 0.000098s ✓ 1st
		250x250 \| 0.001063s 4th \| 0.001545s ✓ 6th \| 0.000447s ✓ 2nd \| 0.000445s ✓ 1st \| 0.001303s ✓ 5th \| 0.000477s ✓ 3rd
		550x500 \| 0.003711s 4th \| 0.011309s ✓ 5th \| 0.001524s ✓ 2nd \| 0.001460s ✓ 1st \| 0.016480s ✓ 6th \| 0.001558s ✓ 3rd
		1000x1000 \| 0.019167s 1st \| 0.053561s ✓ 6th \| 0.025616s ✓ 3rd \| 0.025778s ✓ 4th \| 0.023447s ✓ 2nd \| 0.027353s ✓ 5th
		2000x2500 \| 0.035676s 4th \| 1.579856s ✓ 5th \| 0.014502s ✓ 2nd \| 0.014438s ✓ 1st \| 1.699035s ✓ 6th \| 0.023144s ✓ 3rd
		5000x5000 \| 1.214213s 5th \| 1.230595s ✓ 6th \| 0.511229s ✓ 2nd \| 0.514490s ✓ 3rd \| 1.144982s ✓ 4th \| 0.452692s ✓ 1st
		-----------------------------------------------------------------------------------------------------------------------
		@@ -752,10 +777,10 @@

		🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
		1. LAPX LAPJVS : 1223.3865 ms \| ✅ \| 🥇x2 🥈x3 🥉x2 🚩x1 🏳️x1
		2. LAPX LAPJV : 2312.8874 ms \| ✅ \| 🥇x4 🥈x2 🥉x1 🏳️x1 🥴x1
		3. LAPX LAPJVX : 2315.4386 ms \| ✅ \| 🥈x3 🥉x4 🚩x2
		4. BASELINE SciPy : 2747.6856 ms \| ⭐ \| 🥇x2 🚩x2 🏳️x3 🥴x2
		5. LAPX LAPJVC : 4753.5226 ms \| ✅ \| 🥈x1 🥉x1 🚩x4 🏳️x3
		6. LAPX LAPJV-IFT : 8379.3885 ms \| ✅ \| 🥇x1 🥉x1 🏳️x1 🥴x6
		🎉 ------------------------------------------------------------------------- 🎉
		🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
		1. LAPX LAPJVS : 505.4603 ms \| ✅ \| 🥇x3 🥈x1 🥉x4 🏳️x1
		2. LAPX LAPJV : 553.5970 ms \| ✅ \| 🥈x4 🥉x3 🏳️x2
		3. LAPX LAPJVX : 556.8710 ms \| ✅ \| 🥇x4 🥈x2 🥉x1 🚩x2
		4. BASELINE SciPy : 1274.1026 ms \| ⭐ \| 🥇x2 🥈x1 🚩x4 🏳️x1 🥴x1
		5. LAPX LAPJV-IFT : 2877.7913 ms \| ✅ \| 🚩x1 🏳️x3 🥴x5
		6. LAPX LAPJVC : 2886.1434 ms \| ✅ \| 🥈x1 🥉x1 🚩x2 🏳️x2 🥴x3
		🎉 ------------------------------------------------------------------------- 🎉

		@@ -768,13 +793,13 @@
		-----------------------------------------------------------------------------------------------------------------------
		Size \| BASELINE SciPy \| LAPX LAPJV-IFT \| LAPX LAPJV \| LAPX LAPJVX \| LAPX LAPJVC \| LAPX LAPJVS
		Size \| BASELINE SciPy \| LAPX LAPJV-IFT \| LAPX LAPJV \| LAPX LAPJVX \| LAPX LAPJVC \| LAPX LAPJVS
		-----------------------------------------------------------------------------------------------------------------------
		10x10 \| 0.000210s 6th \| 0.000136s ✓ 4th \| 0.000133s ✓ 3rd \| 0.000127s ✓ 1st \| 0.000142s ✓ 5th \| 0.000131s ✓ 2nd
		25x20 \| 0.000150s 3rd \| 0.000203s ✓ 6th \| 0.000145s ✓ 1st \| 0.000148s ✓ 2nd \| 0.000152s ✓ 4th \| 0.000178s ✓ 5th
		50x50 \| 0.000243s 6th \| 0.000239s ✓ 5th \| 0.000194s ✓ 1st \| 0.000201s ✓ 2nd \| 0.000233s ✓ 4th \| 0.000203s ✓ 3rd
		100x150 \| 0.000426s 4th \| 0.001229s ✓ 6th \| 0.000335s ✓ 3rd \| 0.000329s ✓ 2nd \| 0.001090s ✓ 5th \| 0.000311s ✓ 1st
		250x250 \| 0.002309s 6th \| 0.002270s ✓ 5th \| 0.001100s ✓ 1st \| 0.001198s ✓ 3rd \| 0.001776s ✓ 4th \| 0.001157s ✓ 2nd
		550x500 \| 0.007958s 1st \| 0.236500s ✓ 6th \| 0.012768s ✓ 3rd \| 0.012651s ✓ 2nd \| 0.039369s ✓ 5th \| 0.012789s ✓ 4th
		1000x1000 \| 0.047147s 5th \| 0.089055s ✓ 6th \| 0.044446s ✓ 4th \| 0.044309s ✓ 3rd \| 0.043942s ✓ 2nd \| 0.043357s ✓ 1st
		2000x2500 \| 0.078020s 4th \| 3.430561s ✓ 6th \| 0.021284s ✓ 1st \| 0.021622s ✓ 2nd \| 2.844436s ✓ 5th \| 0.024936s ✓ 3rd
		5000x5000 \| 2.490763s 5th \| 4.480608s ✓ 6th \| 2.195783s ✓ 3rd \| 2.198696s ✓ 4th \| 2.009281s ✓ 2nd \| 1.052344s ✓ 1st
		10x10 \| 0.000055s 6th \| 0.000048s ✓ 5th \| 0.000046s ✓ 4th \| 0.000037s ✓ 1st \| 0.000043s ✓ 3rd \| 0.000040s ✓ 2nd
		25x20 \| 0.000043s 1st \| 0.000059s ✓ 6th \| 0.000053s ✓ 4th \| 0.000045s ✓ 2nd \| 0.000055s ✓ 5th \| 0.000046s ✓ 3rd
		50x50 \| 0.000074s 4th \| 0.000080s ✓ 5th \| 0.000063s ✓ 3rd \| 0.000054s ✓ 1st \| 0.000088s ✓ 6th \| 0.000058s ✓ 2nd
		100x150 \| 0.000146s 4th \| 0.000647s ✓ 5th \| 0.000107s ✓ 3rd \| 0.000095s ✓ 1st \| 0.000714s ✓ 6th \| 0.000103s ✓ 2nd
		250x250 \| 0.000964s 4th \| 0.001495s ✓ 6th \| 0.000565s ✓ 1st \| 0.000603s ✓ 2nd \| 0.001220s ✓ 5th \| 0.000636s ✓ 3rd
		550x500 \| 0.003138s 4th \| 0.010879s ✓ 5th \| 0.001294s ✓ 1st \| 0.001329s ✓ 2nd \| 0.016092s ✓ 6th \| 0.001405s ✓ 3rd
		1000x1000 \| 0.020857s 3rd \| 0.042133s ✓ 6th \| 0.019502s ✓ 2nd \| 0.019448s ✓ 1st \| 0.023370s ✓ 5th \| 0.021119s ✓ 4th
		2000x2500 \| 0.032293s 4th \| 1.575432s ✓ 6th \| 0.014037s ✓ 1st \| 0.014037s ✓ 2nd \| 1.482075s ✓ 5th \| 0.022823s ✓ 3rd
		5000x5000 \| 0.974974s 4th \| 1.340142s ✓ 6th \| 0.564158s ✓ 2nd \| 0.570803s ✓ 3rd \| 1.116583s ✓ 5th \| 0.442339s ✓ 1st
		-----------------------------------------------------------------------------------------------------------------------
		@@ -784,14 +809,14 @@

		🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
		1. LAPX LAPJVS : 1135.4053 ms \| ✅ \| 🥇x3 🥈x2 🥉x2 🚩x1 🏳️x1
		2. LAPX LAPJV : 2276.1874 ms \| ✅ \| 🥇x4 🥉x4 🚩x1
		3. LAPX LAPJVX : 2279.2815 ms \| ✅ \| 🥇x1 🥈x5 🥉x2 🚩x1
		4. BASELINE SciPy : 2627.2261 ms \| ⭐ \| 🥇x1 🥉x1 🚩x2 🏳️x2 🥴x3
		5. LAPX LAPJVC : 4940.4219 ms \| ✅ \| 🥈x2 🚩x3 🏳️x4
		6. LAPX LAPJV-IFT : 8240.8002 ms \| ✅ \| 🚩x1 🏳️x2 🥴x6
		🎉 ------------------------------------------------------------------------- 🎉
		🎉 --------------------------- OVERALL RANKING --------------------------- 🎉
		1. LAPX LAPJVS : 488.5671 ms \| ✅ \| 🥇x1 🥈x3 🥉x4 🚩x1
		2. LAPX LAPJV : 599.8239 ms \| ✅ \| 🥇x3 🥈x2 🥉x2 🚩x2
		3. LAPX LAPJVX : 606.4511 ms \| ✅ \| 🥇x4 🥈x4 🥉x1
		4. BASELINE SciPy : 1032.5424 ms \| ⭐ \| 🥇x1 🥉x1 🚩x6 🥴x1
		5. LAPX LAPJVC : 2640.2397 ms \| ✅ \| 🥉x1 🏳️x5 🥴x3
		6. LAPX LAPJV-IFT : 2970.9133 ms \| ✅ \| 🏳️x4 🥴x5
		🎉 ------------------------------------------------------------------------- 🎉
		```

		👁️ See more results on various platforms and architectures [here](https://github.com/rathaROG/lapx/actions/runs/18830580672).
		👁️ See more results on various platforms and architectures [here](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml).

		</details>

+57

-18

lap/__init__.py

		@@ -59,25 +59,64 @@ # Copyright (c) 2025 Ratha SIV \| MIT License

		__version__ = '0.8.1'
		from typing import TYPE_CHECKING
		import importlib

		# Single-matrix solvers
		from .lapmod import lapmod
		from ._lapjv import (
		lapjv,
		LARGE_ as LARGE,
		FP_1_ as FP_1,
		FP_2_ as FP_2,
		FP_DYNAMIC_ as FP_DYNAMIC
		)
		from ._lapjvx import lapjvx, lapjvxa
		from ._lapjvc import lapjvc
		from .lapjvs import lapjvs, lapjvsa
		if TYPE_CHECKING:
		# Single-matrix solvers
		from ._lapmod_wp import lapmod
		from ._lapjv_wp import lapjv
		from ._lapjvx_wp import lapjvx, lapjvxa
		from ._lapjvc_wp import lapjvc
		from ._lapjvs_wp import lapjvs, lapjvsa
		# Batch solvers
		from ._lapjvx_batch_wp import lapjvx_batch, lapjvxa_batch
		from ._lapjvs_batch_wp import lapjvs_batch, lapjvsa_batch
		# Constants
		from ._lapjv import ( # type: ignore
		LARGE_ as LARGE,
		FP_1_ as FP_1,
		FP_2_ as FP_2,
		FP_DYNAMIC_ as FP_DYNAMIC,
		)

		# Batch solvers
		from .lapjvx_batch import lapjvx_batch, lapjvxa_batch
		from .lapjvs_batch import lapjvs_batch, lapjvsa_batch
		_exports = {
		# Single-matrix solvers
		'lapmod': ("lap._lapmod_wp", "lapmod"),
		'lapjv': ("lap._lapjv_wp", "lapjv"),
		'lapjvx': ("lap._lapjvx_wp", "lapjvx"),
		'lapjvxa': ("lap._lapjvx_wp", "lapjvxa"),
		'lapjvc': ("lap._lapjvc_wp", "lapjvc"),
		'lapjvs': ("lap._lapjvs_wp", "lapjvs"),
		'lapjvsa': ("lap._lapjvs_wp", "lapjvsa"),
		# Batch solvers
		'lapjvx_batch': ("lap._lapjvx_batch_wp", "lapjvx_batch"),
		'lapjvxa_batch': ("lap._lapjvx_batch_wp", "lapjvxa_batch"),
		'lapjvs_batch': ("lap._lapjvs_batch_wp", "lapjvs_batch"),
		'lapjvsa_batch': ("lap._lapjvs_batch_wp", "lapjvsa_batch"),
		# Constants
		'LARGE': ("lap._lapjv", "LARGE_"),
		'FP_1': ("lap._lapjv", "FP_1_"),
		'FP_2': ("lap._lapjv", "FP_2_"),
		'FP_DYNAMIC': ("lap._lapjv", "FP_DYNAMIC_"),
		}

		def __getattr__(name):
		if name in _exports:
		mod_path, attr = _exports[name]
		mod = importlib.import_module(mod_path)
		obj = getattr(mod, attr)
		globals()[name] = obj
		return obj
		raise AttributeError(f"LAPX could not find attribute '{name}'.")

		__version__ = '0.9.0'
		__author__ = 'Ratha SIV'
		__description__ = 'Linear assignment problem solvers, including single and batch solvers.'
		__homepage__ = 'https://github.com/rathaROG/lapx'
		__all__ = [
		'lapjv', 'lapjvx', 'lapjvxa', 'lapjvc', 'lapjvs', 'lapjvsa',
		# Single-matrix solvers
		'lapmod', 'lapjv', 'lapjvx', 'lapjvxa', 'lapjvc', 'lapjvs', 'lapjvsa',
		# Batch solvers
		'lapjvx_batch', 'lapjvxa_batch', 'lapjvs_batch', 'lapjvsa_batch',
		'lapmod', 'FP_1', 'FP_2', 'FP_DYNAMIC', 'LARGE'
		# Constants
		'FP_1', 'FP_2', 'FP_DYNAMIC', 'LARGE',
		]

+41

-48

lapx.egg-info/PKG-INFO

		Metadata-Version: 2.4
		Name: lapx
		Version: 0.8.1
		Version: 0.9.0
		Summary: Linear assignment problem solvers, including single and batch solvers.
		@@ -52,9 +52,10 @@ Home-page: https://github.com/rathaROG/lapx

		<details><summary>🆕 What's new</summary><br>
		<details><summary>🆕 What's new</summary><hr>

		- 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) added `lapjvx_batch()`, `lapjvxa_batch()`, `lapjvs_batch()`, `lapjvsa_batch()` and `lapjvsa()`.
		- 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) added `lapjvs()`.
		- 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) added `lapjvx()`, `lapjvxa()`, and `lapjvc()`.
		- 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support.
		- Looking for more? See [GitHub releases](https://github.com/rathaROG/lapx/releases).
		<sup>- 2025/10/31: [v0.9.0](https://github.com/rathaROG/lapx/releases/tag/v0.9.0) delivered a major stability and performance upgrade accross most solvers. 🚀 </sup><br>
		<sup>- 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) added `lapjvx_batch()`, `lapjvxa_batch()`, `lapjvs_batch()`, `lapjvsa_batch()` and `lapjvsa()`. </sup><br>
		<sup>- 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) added `lapjvs()`. </sup><br>
		<sup>- 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) added `lapjvx()`, `lapjvxa()`, and `lapjvc()`. </sup><br>
		<sup>- 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support. </sup><br>
		<sup>- Looking for more? See [GitHub releases](https://github.com/rathaROG/lapx/releases). </sup><br>

		@@ -65,17 +66,23 @@ </details>

		<div align="center">

		[![GitHub release](https://img.shields.io/github/release/rathaROG/lapx.svg)](https://github.com/rathaROG/lapx/releases)
		[![Test Simple](https://github.com/rathaROG/lapx/actions/workflows/test_simple.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/test_simple.yaml)
		[![Benchmark](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml)
		[![Test PyPI Build](https://github.com/rathaROG/lapx/actions/workflows/prepublish.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/prepublish.yaml)
		[![Publish to PyPI](https://github.com/rathaROG/lapx/actions/workflows/publish.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/publish.yaml)
		[![Platforms](https://img.shields.io/badge/platform-windows%20%7C%20linux%20%7C%20macos-gold)](https://pypi.org/project/lapx/#files)
		[![Python Versions](https://img.shields.io/pypi/pyversions/lapx.svg)](https://pypi.org/project/lapx/)

		# Linear Assignment Problem Solvers
		[![Benchmark (Single)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml)
		[![Benchmark (Batch)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml)
		[![Benchmark (Object Tracking)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml)

		[`lapx`](https://github.com/rathaROG/lapx) supports all input cost types — Single ✓ Batch ✓ Square ✓ Rectangular ✓ .
		# Linear Assignment Problem Solvers · 𝕏

		`lapx` was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a *Jonker-Volgenant* solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions.
		Single ✓ Batch ✓ Square ✓ Rectangular ✓

		</div>

		[`lapx`](https://github.com/rathaROG/lapx) was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a *Jonker-Volgenant* solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions.

		<details><summary>Click to read more ...</summary><br>

		All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on *Jonker-Volgenant* algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core `lapjv()` and `lapmod()` from scratch based solely on the papers ¹˒² and the public domain Pascal implementation provided by A. Volgenant ³.
		All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on *Jonker-Volgenant* algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core `lapjv()` and `lapmod()` from scratch based solely on the papers ¹˒² and the public domain Pascal implementation ³ provided by A. Volgenant.

		@@ -93,3 +100,3 @@ <sup>¹ R. Jonker and A. Volgenant, "A Shortest Augmenting Path Algorithm for Dense and Sparse Linear Assignment Problems", Computing 38, 325-340 (1987) </sup><br>
		[![Wheels](https://img.shields.io/pypi/wheel/lapx)](https://pypi.org/project/lapx/)
		[![PyPI version](https://badge.fury.io/py/lapx.svg)](https://badge.fury.io/py/lapx)
		[![PyPI version](https://badge.fury.io/py/lapx.svg?v0.8.1)](https://badge.fury.io/py/lapx)
		[![Downloads](https://static.pepy.tech/badge/lapx)](https://pepy.tech/project/lapx)
		@@ -102,15 +109,4 @@ [![Downloads](https://static.pepy.tech/badge/lapx/month)](https://pepy.tech/project/lapx)

		<details><summary>🛞 Pre-built wheel support</summary>
		The pre-built wheels cover most platforms and architectures, see [details](https://pypi.org/project/lapx/#files).

		\| Python \| Windows ✅ \| Linux ✅ \| macOS ✅ \|
		\|:---:\|:---:\|:---:\|:---:\|
		\| 3.7 \| AMD64 \| x86_64/aarch64 \| x86_64 \|
		\| 3.8 \| AMD64 \| x86_64/aarch64 \| x86_64/arm64 \|
		\| 3.9-3.14 ¹ \| AMD64/ARM64 ² \| x86_64/aarch64 \| x86_64/arm64 \|

		<sup>¹ ⚠️ Pre-built wheels for Python 3.13+ do not support free-threading. </sup><br>
		<sup>² ⚠️ Windows ARM64 is experimental. </sup><br>

		</details>

		<details><summary>🛠️ Other installation options</summary>
		@@ -139,2 +135,4 @@

		[![Full Tests](https://github.com/rathaROG/lapx/actions/workflows/tests.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/tests.yaml)

		### 🅰️ Single-matrix Solvers 📄
		@@ -196,3 +194,4 @@
		total_cost, row_indices, col_indices = lap.lapjvx(np.random.rand(100, 150), extend_cost=True, return_cost=True)
		assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices)))
		assignments = np.column_stack((row_indices, col_indices))
		# assignments = np.array(list(zip(row_indices, col_indices))) # slower
		```
		@@ -226,3 +225,4 @@
		total_cost, row_indices, col_indices = lap.lapjvc(np.random.rand(100, 150), return_cost=True)
		assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices)))
		assignments = np.column_stack((row_indices, col_indices))
		# assignments = np.array(list(zip(row_indices, col_indices))) # slower
		```
		@@ -243,3 +243,4 @@
		total_cost, row_indices, col_indices = lap.lapjvs(np.random.rand(100, 150), return_cost=True, jvx_like=True)
		assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices)))
		assignments = np.column_stack((row_indices, col_indices))
		# assignments = np.array(list(zip(row_indices, col_indices))) # slower
		```
		@@ -268,3 +269,3 @@

		For see the [docstring](https://github.com/rathaROG/lapx/blob/8d56b42265a23c3b5a290b1039dacaac70dfe60d/lap/lapmod.py#L275) for details.
		For see the [`lap/_lapmod_wp.py`](https://github.com/rathaROG/lapx/blob/main/lap/_lapmod_wp.py) for details.

		@@ -302,7 +303,4 @@ ```python
		print(f"total costs = {costs.sum()}")

		# Access the assignment batch b = 7
		rows_7 = rows[7] # 1D array of row indices
		cols_7 = cols[7] # 1D array of col indices
		assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2)
		# access the assignments @ batch b = 7
		assignments_7 = np.column_stack((rows[7], cols[7])) # (K_b, 2)
		print(f"assignments_7.shape = {assignments_7.shape}")
		@@ -323,4 +321,3 @@ ```
		print(f"total costs = {costs.sum()}")
		print(f"len(assignments) = {len(assignments)}")
		print(f"assignments[0].shape = {assignments[0].shape}")
		print(f"assignments[7].shape = {assignments[7].shape}") # assignments @ batch b = 7
		```
		@@ -342,7 +339,4 @@
		print(f"total costs = {costs.sum()}")

		# Access the assignment batch b = 7
		rows_7 = rows[7] # 1D array of row indices
		cols_7 = cols[7] # 1D array of col indices
		assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2)
		# access the assignments @ batch b = 7
		assignments_7 = np.column_stack((rows[7], cols[7])) # (K_b, 2)
		print(f"assignments_7.shape = {assignments_7.shape}")
		@@ -365,4 +359,3 @@ ```
		print(f"total costs = {costs.sum()}")
		print(f"len(assignments) = {len(assignments)}")
		print(f"assignments[0].shape = {assignments[0].shape}")
		print(f"assignments[7].shape = {assignments[7].shape}") # assignments @ batch b = 7
		```
		@@ -378,3 +371,3 @@

		[![NOTICE](https://img.shields.io/badge/NOTICE-present-blue)](https://github.com/rathaROG/lapx/blob/main/NOTICE)
		[![License](https://img.shields.io/pypi/l/lapx.svg)](https://github.com/rathaROG/lapx/blob/main/LICENSE)
		[![NOTICE](https://img.shields.io/badge/NOTICE-Present-blue)](https://github.com/rathaROG/lapx/blob/main/NOTICE)
		[![LICENSE](https://img.shields.io/badge/LICENSE-MIT-green)](https://github.com/rathaROG/lapx/blob/main/LICENSE)

+7

-4

lapx.egg-info/SOURCES.txt

		@@ -9,6 +9,9 @@ LICENSE
		lap/__init__.py
		lap/lapjvs.py
		lap/lapjvs_batch.py
		lap/lapjvx_batch.py
		lap/lapmod.py
		lap/_lapjv_wp.py
		lap/_lapjvc_wp.py
		lap/_lapjvs_batch_wp.py
		lap/_lapjvs_wp.py
		lap/_lapjvx_batch_wp.py
		lap/_lapjvx_wp.py
		lap/_lapmod_wp.py
		lapx.egg-info/PKG-INFO
		@@ -15,0 +18,0 @@ lapx.egg-info/SOURCES.txt

+1

-0

MANIFEST.in

		@@ -8,2 +8,3 @@ include .md .in LICENSE NOTICE
		prune .vscode
		prune benchmarks
		prune tests

+41

-48

PKG-INFO

		Metadata-Version: 2.4
		Name: lapx
		Version: 0.8.1
		Version: 0.9.0
		Summary: Linear assignment problem solvers, including single and batch solvers.
		@@ -52,9 +52,10 @@ Home-page: https://github.com/rathaROG/lapx

		<details><summary>🆕 What's new</summary><br>
		<details><summary>🆕 What's new</summary><hr>

		- 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) added `lapjvx_batch()`, `lapjvxa_batch()`, `lapjvs_batch()`, `lapjvsa_batch()` and `lapjvsa()`.
		- 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) added `lapjvs()`.
		- 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) added `lapjvx()`, `lapjvxa()`, and `lapjvc()`.
		- 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support.
		- Looking for more? See [GitHub releases](https://github.com/rathaROG/lapx/releases).
		<sup>- 2025/10/31: [v0.9.0](https://github.com/rathaROG/lapx/releases/tag/v0.9.0) delivered a major stability and performance upgrade accross most solvers. 🚀 </sup><br>
		<sup>- 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) added `lapjvx_batch()`, `lapjvxa_batch()`, `lapjvs_batch()`, `lapjvsa_batch()` and `lapjvsa()`. </sup><br>
		<sup>- 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) added `lapjvs()`. </sup><br>
		<sup>- 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) added `lapjvx()`, `lapjvxa()`, and `lapjvc()`. </sup><br>
		<sup>- 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support. </sup><br>
		<sup>- Looking for more? See [GitHub releases](https://github.com/rathaROG/lapx/releases). </sup><br>

		@@ -65,17 +66,23 @@ </details>

		<div align="center">

		[![GitHub release](https://img.shields.io/github/release/rathaROG/lapx.svg)](https://github.com/rathaROG/lapx/releases)
		[![Test Simple](https://github.com/rathaROG/lapx/actions/workflows/test_simple.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/test_simple.yaml)
		[![Benchmark](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml)
		[![Test PyPI Build](https://github.com/rathaROG/lapx/actions/workflows/prepublish.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/prepublish.yaml)
		[![Publish to PyPI](https://github.com/rathaROG/lapx/actions/workflows/publish.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/publish.yaml)
		[![Platforms](https://img.shields.io/badge/platform-windows%20%7C%20linux%20%7C%20macos-gold)](https://pypi.org/project/lapx/#files)
		[![Python Versions](https://img.shields.io/pypi/pyversions/lapx.svg)](https://pypi.org/project/lapx/)

		# Linear Assignment Problem Solvers
		[![Benchmark (Single)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml)
		[![Benchmark (Batch)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml)
		[![Benchmark (Object Tracking)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml)

		[`lapx`](https://github.com/rathaROG/lapx) supports all input cost types — Single ✓ Batch ✓ Square ✓ Rectangular ✓ .
		# Linear Assignment Problem Solvers · 𝕏

		`lapx` was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a *Jonker-Volgenant* solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions.
		Single ✓ Batch ✓ Square ✓ Rectangular ✓

		</div>

		[`lapx`](https://github.com/rathaROG/lapx) was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a *Jonker-Volgenant* solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions.

		<details><summary>Click to read more ...</summary><br>

		All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on *Jonker-Volgenant* algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core `lapjv()` and `lapmod()` from scratch based solely on the papers ¹˒² and the public domain Pascal implementation provided by A. Volgenant ³.
		All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on *Jonker-Volgenant* algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core `lapjv()` and `lapmod()` from scratch based solely on the papers ¹˒² and the public domain Pascal implementation ³ provided by A. Volgenant.

		@@ -93,3 +100,3 @@ <sup>¹ R. Jonker and A. Volgenant, "A Shortest Augmenting Path Algorithm for Dense and Sparse Linear Assignment Problems", Computing 38, 325-340 (1987) </sup><br>
		[![Wheels](https://img.shields.io/pypi/wheel/lapx)](https://pypi.org/project/lapx/)
		[![PyPI version](https://badge.fury.io/py/lapx.svg)](https://badge.fury.io/py/lapx)
		[![PyPI version](https://badge.fury.io/py/lapx.svg?v0.8.1)](https://badge.fury.io/py/lapx)
		[![Downloads](https://static.pepy.tech/badge/lapx)](https://pepy.tech/project/lapx)
		@@ -102,15 +109,4 @@ [![Downloads](https://static.pepy.tech/badge/lapx/month)](https://pepy.tech/project/lapx)

		<details><summary>🛞 Pre-built wheel support</summary>
		The pre-built wheels cover most platforms and architectures, see [details](https://pypi.org/project/lapx/#files).

		\| Python \| Windows ✅ \| Linux ✅ \| macOS ✅ \|
		\|:---:\|:---:\|:---:\|:---:\|
		\| 3.7 \| AMD64 \| x86_64/aarch64 \| x86_64 \|
		\| 3.8 \| AMD64 \| x86_64/aarch64 \| x86_64/arm64 \|
		\| 3.9-3.14 ¹ \| AMD64/ARM64 ² \| x86_64/aarch64 \| x86_64/arm64 \|

		<sup>¹ ⚠️ Pre-built wheels for Python 3.13+ do not support free-threading. </sup><br>
		<sup>² ⚠️ Windows ARM64 is experimental. </sup><br>

		</details>

		<details><summary>🛠️ Other installation options</summary>
		@@ -139,2 +135,4 @@

		[![Full Tests](https://github.com/rathaROG/lapx/actions/workflows/tests.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/tests.yaml)

		### 🅰️ Single-matrix Solvers 📄
		@@ -196,3 +194,4 @@
		total_cost, row_indices, col_indices = lap.lapjvx(np.random.rand(100, 150), extend_cost=True, return_cost=True)
		assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices)))
		assignments = np.column_stack((row_indices, col_indices))
		# assignments = np.array(list(zip(row_indices, col_indices))) # slower
		```
		@@ -226,3 +225,4 @@
		total_cost, row_indices, col_indices = lap.lapjvc(np.random.rand(100, 150), return_cost=True)
		assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices)))
		assignments = np.column_stack((row_indices, col_indices))
		# assignments = np.array(list(zip(row_indices, col_indices))) # slower
		```
		@@ -243,3 +243,4 @@
		total_cost, row_indices, col_indices = lap.lapjvs(np.random.rand(100, 150), return_cost=True, jvx_like=True)
		assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices)))
		assignments = np.column_stack((row_indices, col_indices))
		# assignments = np.array(list(zip(row_indices, col_indices))) # slower
		```
		@@ -268,3 +269,3 @@

		For see the [docstring](https://github.com/rathaROG/lapx/blob/8d56b42265a23c3b5a290b1039dacaac70dfe60d/lap/lapmod.py#L275) for details.
		For see the [`lap/_lapmod_wp.py`](https://github.com/rathaROG/lapx/blob/main/lap/_lapmod_wp.py) for details.

		@@ -302,7 +303,4 @@ ```python
		print(f"total costs = {costs.sum()}")

		# Access the assignment batch b = 7
		rows_7 = rows[7] # 1D array of row indices
		cols_7 = cols[7] # 1D array of col indices
		assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2)
		# access the assignments @ batch b = 7
		assignments_7 = np.column_stack((rows[7], cols[7])) # (K_b, 2)
		print(f"assignments_7.shape = {assignments_7.shape}")
		@@ -323,4 +321,3 @@ ```
		print(f"total costs = {costs.sum()}")
		print(f"len(assignments) = {len(assignments)}")
		print(f"assignments[0].shape = {assignments[0].shape}")
		print(f"assignments[7].shape = {assignments[7].shape}") # assignments @ batch b = 7
		```
		@@ -342,7 +339,4 @@
		print(f"total costs = {costs.sum()}")

		# Access the assignment batch b = 7
		rows_7 = rows[7] # 1D array of row indices
		cols_7 = cols[7] # 1D array of col indices
		assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2)
		# access the assignments @ batch b = 7
		assignments_7 = np.column_stack((rows[7], cols[7])) # (K_b, 2)
		print(f"assignments_7.shape = {assignments_7.shape}")
		@@ -365,4 +359,3 @@ ```
		print(f"total costs = {costs.sum()}")
		print(f"len(assignments) = {len(assignments)}")
		print(f"assignments[0].shape = {assignments[0].shape}")
		print(f"assignments[7].shape = {assignments[7].shape}") # assignments @ batch b = 7
		```
		@@ -378,3 +371,3 @@

		[![NOTICE](https://img.shields.io/badge/NOTICE-present-blue)](https://github.com/rathaROG/lapx/blob/main/NOTICE)
		[![License](https://img.shields.io/pypi/l/lapx.svg)](https://github.com/rathaROG/lapx/blob/main/LICENSE)
		[![NOTICE](https://img.shields.io/badge/NOTICE-Present-blue)](https://github.com/rathaROG/lapx/blob/main/NOTICE)
		[![LICENSE](https://img.shields.io/badge/LICENSE-MIT-green)](https://github.com/rathaROG/lapx/blob/main/LICENSE)

+40

-47

README.md

		@@ -1,8 +0,9 @@
		<details><summary>🆕 What's new</summary><br>
		<details><summary>🆕 What's new</summary><hr>

		- 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) added `lapjvx_batch()`, `lapjvxa_batch()`, `lapjvs_batch()`, `lapjvsa_batch()` and `lapjvsa()`.
		- 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) added `lapjvs()`.
		- 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) added `lapjvx()`, `lapjvxa()`, and `lapjvc()`.
		- 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support.
		- Looking for more? See [GitHub releases](https://github.com/rathaROG/lapx/releases).
		<sup>- 2025/10/31: [v0.9.0](https://github.com/rathaROG/lapx/releases/tag/v0.9.0) delivered a major stability and performance upgrade accross most solvers. 🚀 </sup><br>
		<sup>- 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) added `lapjvx_batch()`, `lapjvxa_batch()`, `lapjvs_batch()`, `lapjvsa_batch()` and `lapjvsa()`. </sup><br>
		<sup>- 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) added `lapjvs()`. </sup><br>
		<sup>- 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) added `lapjvx()`, `lapjvxa()`, and `lapjvc()`. </sup><br>
		<sup>- 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support. </sup><br>
		<sup>- Looking for more? See [GitHub releases](https://github.com/rathaROG/lapx/releases). </sup><br>

		@@ -13,17 +14,23 @@ </details>

		<div align="center">

		[![GitHub release](https://img.shields.io/github/release/rathaROG/lapx.svg)](https://github.com/rathaROG/lapx/releases)
		[![Test Simple](https://github.com/rathaROG/lapx/actions/workflows/test_simple.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/test_simple.yaml)
		[![Benchmark](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml)
		[![Test PyPI Build](https://github.com/rathaROG/lapx/actions/workflows/prepublish.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/prepublish.yaml)
		[![Publish to PyPI](https://github.com/rathaROG/lapx/actions/workflows/publish.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/publish.yaml)
		[![Platforms](https://img.shields.io/badge/platform-windows%20%7C%20linux%20%7C%20macos-gold)](https://pypi.org/project/lapx/#files)
		[![Python Versions](https://img.shields.io/pypi/pyversions/lapx.svg)](https://pypi.org/project/lapx/)

		# Linear Assignment Problem Solvers
		[![Benchmark (Single)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_single.yaml)
		[![Benchmark (Batch)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_batch.yaml)
		[![Benchmark (Object Tracking)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/benchmark_tracking.yaml)

		[`lapx`](https://github.com/rathaROG/lapx) supports all input cost types — Single ✓ Batch ✓ Square ✓ Rectangular ✓ .
		# Linear Assignment Problem Solvers · 𝕏

		`lapx` was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a *Jonker-Volgenant* solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions.
		Single ✓ Batch ✓ Square ✓ Rectangular ✓

		</div>

		[`lapx`](https://github.com/rathaROG/lapx) was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a *Jonker-Volgenant* solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions.

		<details><summary>Click to read more ...</summary><br>

		All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on *Jonker-Volgenant* algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core `lapjv()` and `lapmod()` from scratch based solely on the papers ¹˒² and the public domain Pascal implementation provided by A. Volgenant ³.
		All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on *Jonker-Volgenant* algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core `lapjv()` and `lapmod()` from scratch based solely on the papers ¹˒² and the public domain Pascal implementation ³ provided by A. Volgenant.

		@@ -41,3 +48,3 @@ <sup>¹ R. Jonker and A. Volgenant, "A Shortest Augmenting Path Algorithm for Dense and Sparse Linear Assignment Problems", Computing 38, 325-340 (1987) </sup><br>
		[![Wheels](https://img.shields.io/pypi/wheel/lapx)](https://pypi.org/project/lapx/)
		[![PyPI version](https://badge.fury.io/py/lapx.svg)](https://badge.fury.io/py/lapx)
		[![PyPI version](https://badge.fury.io/py/lapx.svg?v0.8.1)](https://badge.fury.io/py/lapx)
		[![Downloads](https://static.pepy.tech/badge/lapx)](https://pepy.tech/project/lapx)
		@@ -50,15 +57,4 @@ [![Downloads](https://static.pepy.tech/badge/lapx/month)](https://pepy.tech/project/lapx)

		<details><summary>🛞 Pre-built wheel support</summary>
		The pre-built wheels cover most platforms and architectures, see [details](https://pypi.org/project/lapx/#files).

		\| Python \| Windows ✅ \| Linux ✅ \| macOS ✅ \|
		\|:---:\|:---:\|:---:\|:---:\|
		\| 3.7 \| AMD64 \| x86_64/aarch64 \| x86_64 \|
		\| 3.8 \| AMD64 \| x86_64/aarch64 \| x86_64/arm64 \|
		\| 3.9-3.14 ¹ \| AMD64/ARM64 ² \| x86_64/aarch64 \| x86_64/arm64 \|

		<sup>¹ ⚠️ Pre-built wheels for Python 3.13+ do not support free-threading. </sup><br>
		<sup>² ⚠️ Windows ARM64 is experimental. </sup><br>

		</details>

		<details><summary>🛠️ Other installation options</summary>
		@@ -87,2 +83,4 @@

		[![Full Tests](https://github.com/rathaROG/lapx/actions/workflows/tests.yaml/badge.svg)](https://github.com/rathaROG/lapx/actions/workflows/tests.yaml)

		### 🅰️ Single-matrix Solvers 📄
		@@ -144,3 +142,4 @@
		total_cost, row_indices, col_indices = lap.lapjvx(np.random.rand(100, 150), extend_cost=True, return_cost=True)
		assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices)))
		assignments = np.column_stack((row_indices, col_indices))
		# assignments = np.array(list(zip(row_indices, col_indices))) # slower
		```
		@@ -174,3 +173,4 @@
		total_cost, row_indices, col_indices = lap.lapjvc(np.random.rand(100, 150), return_cost=True)
		assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices)))
		assignments = np.column_stack((row_indices, col_indices))
		# assignments = np.array(list(zip(row_indices, col_indices))) # slower
		```
		@@ -191,3 +191,4 @@
		total_cost, row_indices, col_indices = lap.lapjvs(np.random.rand(100, 150), return_cost=True, jvx_like=True)
		assignments = np.column_stack((row_indices, col_indices)) # or np.array(list(zip(row_indices, col_indices)))
		assignments = np.column_stack((row_indices, col_indices))
		# assignments = np.array(list(zip(row_indices, col_indices))) # slower
		```
		@@ -216,3 +217,3 @@

		For see the [docstring](https://github.com/rathaROG/lapx/blob/8d56b42265a23c3b5a290b1039dacaac70dfe60d/lap/lapmod.py#L275) for details.
		For see the [`lap/_lapmod_wp.py`](https://github.com/rathaROG/lapx/blob/main/lap/_lapmod_wp.py) for details.

		@@ -250,7 +251,4 @@ ```python
		print(f"total costs = {costs.sum()}")

		# Access the assignment batch b = 7
		rows_7 = rows[7] # 1D array of row indices
		cols_7 = cols[7] # 1D array of col indices
		assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2)
		# access the assignments @ batch b = 7
		assignments_7 = np.column_stack((rows[7], cols[7])) # (K_b, 2)
		print(f"assignments_7.shape = {assignments_7.shape}")
		@@ -271,4 +269,3 @@ ```
		print(f"total costs = {costs.sum()}")
		print(f"len(assignments) = {len(assignments)}")
		print(f"assignments[0].shape = {assignments[0].shape}")
		print(f"assignments[7].shape = {assignments[7].shape}") # assignments @ batch b = 7
		```
		@@ -290,7 +287,4 @@
		print(f"total costs = {costs.sum()}")

		# Access the assignment batch b = 7
		rows_7 = rows[7] # 1D array of row indices
		cols_7 = cols[7] # 1D array of col indices
		assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2)
		# access the assignments @ batch b = 7
		assignments_7 = np.column_stack((rows[7], cols[7])) # (K_b, 2)
		print(f"assignments_7.shape = {assignments_7.shape}")
		@@ -313,4 +307,3 @@ ```
		print(f"total costs = {costs.sum()}")
		print(f"len(assignments) = {len(assignments)}")
		print(f"assignments[0].shape = {assignments[0].shape}")
		print(f"assignments[7].shape = {assignments[7].shape}") # assignments @ batch b = 7
		```
		@@ -326,3 +319,3 @@

		[![NOTICE](https://img.shields.io/badge/NOTICE-present-blue)](https://github.com/rathaROG/lapx/blob/main/NOTICE)
		[![License](https://img.shields.io/pypi/l/lapx.svg)](https://github.com/rathaROG/lapx/blob/main/LICENSE)
		[![NOTICE](https://img.shields.io/badge/NOTICE-Present-blue)](https://github.com/rathaROG/lapx/blob/main/NOTICE)
		[![LICENSE](https://img.shields.io/badge/LICENSE-MIT-green)](https://github.com/rathaROG/lapx/blob/main/LICENSE)

+154

-61

src/_lapjv/_lapjv.pyx

		@@ -46,72 +46,130 @@ # Tomas Kazmar, 2012-2017, BSD 2-clause license, see LICENSE.
		double cost_limit=np.inf, char return_cost=True):
		"""Solve linear assignment problem using Jonker-Volgenant algorithm.
		"""
		Solve the Linear Assignment Problem using the Jonker-Volgenant (JV) algorithm.

		Orientation is normalized internally for performance: the native kernel
		always sees rows <= cols by transposing when N_rows > N_cols, and results
		are mapped back to the original (row, col) orientation before returning.

		Parameters
		----------
		cost: (N,M) ndarray
		Cost matrix. Entry `cost[i, j]` is the cost of assigning row `i` to
		column `j`.
		extend_cost: bool, optional
		Whether or not extend a non-square matrix. Default: False.
		cost_limit: double, optional
		An upper limit for a cost of a single assignment. Default: `np.inf`.
		return_cost: bool, optional
		Whether or not to return the assignment cost.
		cost : (N, M) ndarray
		Cost matrix. Entry cost[i, j] is the cost of assigning row i to column j.
		Any float dtype is accepted; a contiguous float64 working buffer is used only if needed.
		extend_cost : bool, optional (default: False)
		Whether to permit non-square inputs via zero-padding to a square matrix.
		See the unified augmentation policy below.
		cost_limit : float, optional (default: np.inf)
		If finite, augment to an (N+M) x (N+M) matrix with sentinel costs
		cost_limit/2 and a 0 block in the bottom-right. This models per-edge
		reject costs and allows rectangular inputs even when extend_cost=False.
		return_cost : bool, optional (default: True)
		Whether to return the total assignment cost as the first return value.

		Returns
		-------
		opt: double
		Assignment cost. Not returned if `return_cost is False`.
		x: (N,) ndarray
		Assignment. `x[i]` specifies the column to which row `i` is assigned.
		y: (N,) ndarray
		Assignment. `y[j]` specifies the row to which column `j` is assigned.
		opt : float
		Total assignment cost (only if return_cost=True). The total is computed
		from the ORIGINAL input array shape (N, M), not the padded/augmented one.
		x : (N,) ndarray of int32
		x[i] = assigned column index for row i, or -1 if unassigned.
		y : (M,) ndarray of int32
		y[j] = assigned row index for column j, or -1 if unassigned.

		Unified augmentation policy
		---------------------------
		- If cost_limit < inf: always augment to (N+M) to model per-edge rejects.
		Rectangular inputs are allowed regardless of extend_cost.
		- Else if (N != M) or extend_cost=True: zero-pad to a square of size max(N, M).
		Rectangular inputs are allowed when extend_cost=True.
		- Else (square, un-augmented): run on the given square matrix.

		Notes
		-----
		For non-square matrices (with `extend_cost is True`) or `cost_limit` set
		low enough, there will be unmatched rows, columns in the solution `x`, `y`.
		All such entries are set to -1.
		- Single contiguous working buffer: we reuse the input when it's already
		float64 C-contiguous and no transpose is needed; otherwise we materialize
		exactly one contiguous float64 working array (or its transpose).
		- For zero-sized dimensions, the solver returns 0.0 (if requested) and
		all -1 mappings with appropriate lengths.
		"""
		if cost.ndim != 2:
		raise ValueError('2-dimensional array expected')
		cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] cost_c = \
		np.ascontiguousarray(cost, dtype=np.double)

		# Original input for final total computation (no copy unless necessary for transpose below)
		A = np.asarray(cost)
		cdef Py_ssize_t n_rows0 = A.shape[0]
		cdef Py_ssize_t n_cols0 = A.shape[1]

		# Fast exits for empty dimensions
		if n_rows0 == 0 or n_cols0 == 0:
		if return_cost:
		return 0.0, np.full((n_rows0,), -1, dtype=np.int32), np.full((n_cols0,), -1, dtype=np.int32)
		else:
		return np.full((n_rows0,), -1, dtype=np.int32), np.full((n_cols0,), -1, dtype=np.int32)

		# Normalize orientation: kernel sees rows <= cols
		cdef bint transposed = False
		cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] B
		if n_rows0 > n_cols0:
		B = np.ascontiguousarray(A.T, dtype=np.double) # single working buffer (transposed)
		transposed = True
		else:
		if A.dtype == np.float64 and A.flags['C_CONTIGUOUS']:
		# reuse input buffer directly
		B = A
		else:
		B = np.ascontiguousarray(A, dtype=np.double) # single working buffer

		cdef Py_ssize_t R = B.shape[0] # working rows (<= cols)
		cdef Py_ssize_t C = B.shape[1] # working cols

		# Permit rectangular when cost_limit < inf (augment) or extend_cost=True (zero-pad); otherwise require square
		if R != C and (not extend_cost) and cost_limit == np.inf:
		raise ValueError(
		'Square cost array expected. If cost is intentionally '
		'non-square, pass extend_cost=True or set a finite cost_limit.'
		)

		cdef uint_t N
		cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] cost_c = B
		cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] cost_c_extended
		cdef uint_t n_rows = cost_c.shape[0]
		cdef uint_t n_cols = cost_c.shape[1]
		cdef uint_t n = 0
		if n_rows == n_cols:
		n = n_rows
		else:
		if not extend_cost:
		raise ValueError(
		'Square cost array expected. If cost is intentionally '
		'non-square, pass extend_cost=True.')

		if cost_limit < np.inf:
		n = n_rows + n_cols
		cost_c_extended = np.empty((n, n), dtype=np.double)
		cost_c_extended[:] = cost_limit / 2.
		cost_c_extended[n_rows:, n_cols:] = 0
		cost_c_extended[:n_rows, :n_cols] = cost_c
		# Augment to (R+C)x(R+C) with sentinel edges
		N = <uint_t>(R + C)
		cost_c_extended = np.empty((R + C, R + C), dtype=np.double)
		cost_c_extended[:] = cost_limit / 2.0
		cost_c_extended[R:, C:] = 0.0
		cost_c_extended[:R, :C] = cost_c
		cost_c = cost_c_extended
		elif extend_cost:
		n = max(n_rows, n_cols)
		cost_c_extended = np.zeros((n, n), dtype=np.double)
		cost_c_extended[:n_rows, :n_cols] = cost_c
		cost_c = cost_c_extended
		elif R != C or extend_cost:
		# Zero-pad to square max(R, C); if already square and extend_cost=True, keep as-is
		N = <uint_t>max(R, C)
		if R != C:
		cost_c_extended = np.zeros((N, N), dtype=np.double)
		cost_c_extended[:R, :C] = cost_c
		cost_c = cost_c_extended
		else:
		N = <uint_t>R
		else:
		# Square, un-augmented
		N = <uint_t>R

		cdef double **cost_ptr
		cost_ptr = <double *> malloc(n sizeof(double *))
		# Build row-pointer view for kernel
		cdef double cost_ptr = <double > malloc(N * sizeof(double *))
		if cost_ptr == NULL:
		raise MemoryError('Out of memory.')
		cdef int i
		for i in range(n):
		for i in range(N):
		cost_ptr[i] = &cost_c[i, 0]

		cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_c = \
		np.empty((n,), dtype=np.int32)
		cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_c = \
		np.empty((n,), dtype=np.int32)
		# Outputs for kernel space (size N)
		cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_c = np.empty((N,), dtype=np.int32)
		cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_c = np.empty((N,), dtype=np.int32)

		cdef int ret = lapjv_internal(n, cost_ptr, &x_c[0], &y_c[0])
		cdef int ret
		with nogil:
		ret = lapjv_internal(N, cost_ptr, &x_c[0], &y_c[0])
		free(cost_ptr)

		if ret != 0:
		@@ -122,17 +180,52 @@ if ret == -1:

		cdef double opt = np.nan
		if cost_limit < np.inf or extend_cost:
		x_c[x_c >= n_cols] = -1
		y_c[y_c >= n_rows] = -1
		x_c = x_c[:n_rows]
		y_c = y_c[:n_cols]
		if return_cost:
		opt = cost_c[np.nonzero(x_c != -1)[0], x_c[x_c != -1]].sum()
		elif return_cost:
		opt = cost_c[np.arange(n_rows), x_c].sum()
		# Trim to working rectangle (B space) and clean artificial matches
		cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_trim
		cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_trim

		if cost_limit < np.inf or (R != C or extend_cost):
		x_c[x_c >= C] = -1
		y_c[y_c >= R] = -1
		x_trim = x_c[:R]
		y_trim = y_c[:C]
		else:
		x_trim = x_c[:R]
		y_trim = y_c[:C]

		# Map to ORIGINAL orientation (A space) as vectors x_out (N_rows0), y_out (N_cols0)
		x_out = np.full((n_rows0,), -1, dtype=np.int32)
		y_out = np.full((n_cols0,), -1, dtype=np.int32)

		if not transposed:
		mask = (x_trim >= 0)
		if np.any(mask):
		rows = np.nonzero(mask)[0]
		cols = x_trim[mask]
		x_out[rows] = cols
		y_out[cols] = rows
		else:
		# B rows are A columns; B cols are A rows
		mask = (x_trim >= 0)
		if np.any(mask):
		rows_b = np.nonzero(mask)[0] # indices in B rows -> A columns
		cols_b = x_trim[mask] # indices in B cols -> A rows
		row_A = cols_b
		col_A = rows_b
		x_out[row_A] = col_A
		y_out[col_A] = row_A

		# Total from ORIGINAL A
		cdef double opt = 0.0
		if return_cost:
		return opt, x_c, y_c
		mcost = (x_out >= 0)
		if np.any(mcost):
		rr = np.nonzero(mcost)[0]
		cc = x_out[mcost]
		opt = float(A[rr, cc].sum())
		else:
		opt = 0.0

		if return_cost:
		return opt, x_out, y_out
		else:
		return x_c, y_c
		return x_out, y_out

		@@ -139,0 +232,0 @@

+105

-68

src/_lapjv/_lapjvx.pyx

		@@ -40,63 +40,92 @@ # _lapjvx.pyx \| Wrote on 2025/10/16 by rathaROG

		Parameters
		----------
		cost: (N, M) ndarray
		Cost matrix. Entry cost[i, j] is the cost of assigning row i to column j.
		extend_cost: bool, optional
		Whether or not to extend a non-square matrix. Default: False.
		cost_limit: double, optional
		An upper limit for a cost of a single assignment. Default: np.inf.
		return_cost: bool, optional
		Whether or not to return the assignment cost.
		Orientation is normalized internally (kernel sees rows <= cols) and results
		are mapped back to the original orientation.

		Unified augmentation policy
		---------------------------
		- If cost_limit < inf: augment to (N+M) with cost_limit/2 sentinels (rectangular allowed).
		- Elif (N != M) or extend_cost: zero-pad to square max(N, M) (rectangular allowed when extend_cost=True).
		- Else (square, un-augmented): run on the given square.

		Returns
		-------
		opt: double
		Assignment cost. Not returned if return_cost is False.
		row_indices: (K,) ndarray
		Indices of assigned rows.
		col_indices: (K,) ndarray
		Indices of assigned columns.
		opt : float
		Total cost (if return_cost=True), computed on the ORIGINAL input (not padded).
		row_indices : (K,) ndarray (np.where -> int64)
		col_indices : (K,) ndarray (sliced from x_c -> int32)
		"""
		if cost.ndim != 2:
		raise ValueError('2-dimensional array expected')
		cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] cost_c = \
		np.ascontiguousarray(cost, dtype=np.double)

		# Original input for final total (no copy unless needed for transpose)
		A = np.asarray(cost)
		cdef Py_ssize_t n_rows0 = A.shape[0]
		cdef Py_ssize_t n_cols0 = A.shape[1]

		# Fast exits for empty dims
		if n_rows0 == 0 or n_cols0 == 0:
		if return_cost:
		return 0.0, np.empty((0,), dtype=np.int64), np.empty((0,), dtype=np.int64)
		else:
		return np.empty((0,), dtype=np.int64), np.empty((0,), dtype=np.int64)

		# Normalize orientation: kernel sees rows <= cols
		cdef bint transposed = False
		cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] B
		if n_rows0 > n_cols0:
		B = np.ascontiguousarray(A.T, dtype=np.double) # single working buffer (transposed)
		transposed = True
		else:
		if A.dtype == np.float64 and A.flags['C_CONTIGUOUS']:
		B = A # reuse input buffer
		else:
		B = np.ascontiguousarray(A, dtype=np.double) # single working buffer

		cdef Py_ssize_t R = B.shape[0]
		cdef Py_ssize_t C = B.shape[1]

		# Gate: rectangular error only if extend_cost=False and cost_limit==inf
		if R != C and (not extend_cost) and cost_limit == np.inf:
		raise ValueError(
		'Square cost array expected. If cost is intentionally '
		'non-square, pass extend_cost=True.'
		)

		cdef uint_t N
		cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] cost_c = B
		cdef cnp.ndarray[cnp.double_t, ndim=2, mode='c'] cost_c_extended
		cdef uint_t n_rows = cost_c.shape[0]
		cdef uint_t n_cols = cost_c.shape[1]
		cdef uint_t n = 0
		if n_rows == n_cols:
		n = n_rows
		else:
		if not extend_cost:
		raise ValueError(
		'Square cost array expected. If cost is intentionally '
		'non-square, pass extend_cost=True.')

		if cost_limit < np.inf:
		n = n_rows + n_cols
		cost_c_extended = np.empty((n, n), dtype=np.double)
		cost_c_extended[:] = cost_limit / 2.
		cost_c_extended[n_rows:, n_cols:] = 0
		cost_c_extended[:n_rows, :n_cols] = cost_c
		N = <uint_t>(R + C)
		cost_c_extended = np.empty((R + C, R + C), dtype=np.double)
		cost_c_extended[:] = cost_limit / 2.0
		cost_c_extended[R:, C:] = 0.0
		cost_c_extended[:R, :C] = cost_c
		cost_c = cost_c_extended
		elif extend_cost:
		n = max(n_rows, n_cols)
		cost_c_extended = np.zeros((n, n), dtype=np.double)
		cost_c_extended[:n_rows, :n_cols] = cost_c
		cost_c = cost_c_extended
		elif R != C or extend_cost:
		N = <uint_t>max(R, C)
		if R != C:
		cost_c_extended = np.zeros((N, N), dtype=np.double)
		cost_c_extended[:R, :C] = cost_c
		cost_c = cost_c_extended
		else:
		N = <uint_t>R
		else:
		N = <uint_t>R

		cdef double **cost_ptr
		cost_ptr = <double *> malloc(n sizeof(double *))
		# Build row-pointer view
		cdef double cost_ptr = <double > malloc(N * sizeof(double *))
		if cost_ptr == NULL:
		raise MemoryError('Out of memory when allocating cost_ptr')
		cdef int i
		for i in range(n):
		for i in range(N):
		cost_ptr[i] = &cost_c[i, 0]

		# Allocate x/y, build cost_ptr as before
		cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_c = np.empty((n,), dtype=np.int32)
		cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_c = np.empty((n,), dtype=np.int32)
		# Allocate x/y
		cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_c = np.empty((N,), dtype=np.int32)
		cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_c = np.empty((N,), dtype=np.int32)

		cdef int ret
		with nogil:
		ret = lapjv_internal(<uint_t> n, cost_ptr, &x_c[0], &y_c[0])
		ret = lapjv_internal(<uint_t> N, cost_ptr, &x_c[0], &y_c[0])

		@@ -109,19 +138,34 @@ free(cost_ptr)

		cdef double opt = np.nan
		if cost_limit < np.inf or extend_cost:
		x_c[x_c >= n_cols] = -1
		y_c[y_c >= n_rows] = -1
		x_c = x_c[:n_rows]
		y_c = y_c[:n_cols]
		if return_cost:
		opt = cost_c[np.nonzero(x_c != -1)[0], x_c[x_c != -1]].sum()
		elif return_cost:
		opt = cost_c[np.arange(n_rows), x_c].sum()
		# Trim to working rectangle (B-space) and clean artificial matches
		cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_trim
		cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_trim

		# Construct row_indices, col_indices - only for assigned rows
		assigned_mask = x_c >= 0
		row_indices = np.where(assigned_mask)[0]
		col_indices = x_c[assigned_mask]
		if cost_limit < np.inf or (R != C or extend_cost):
		x_c[x_c >= C] = -1
		y_c[y_c >= R] = -1
		x_trim = x_c[:R]
		y_trim = y_c[:C]
		else:
		x_trim = x_c[:R]
		y_trim = y_c[:C]

		# Build rows/cols in ORIGINAL orientation (A-space)
		rows_b = np.nonzero(x_trim >= 0)[0].astype(np.int64, copy=False)
		cols_b = x_trim[x_trim >= 0] # keep int32 dtype

		if transposed:
		row_indices = cols_b
		col_indices = rows_b
		else:
		row_indices = rows_b
		col_indices = cols_b

		cdef double opt = 0.0
		if return_cost:
		if row_indices.size:
		opt = float(A[row_indices, col_indices].sum())
		else:
		opt = 0.0

		if return_cost:
		return opt, row_indices, col_indices
		@@ -140,12 +184,5 @@ else:
		"""
		Like lapjvx, but returns assignment pairs as a (K,2) np.ndarray of (row, col).

		Returns
		-------
		opt : double
		Assignment cost. Not returned if return_cost is False.
		assignments : (K, 2) ndarray
		Each row is (row_index, col_index).
		Like lapjvx, but returns assignment pairs as a (K,2) ndarray of (row, col).
		Uses int32 pairs to match legacy behavior.
		"""
		# We call lapjvx to get the indices
		if return_cost:
		@@ -152,0 +189,0 @@ opt, row_indices, col_indices = lapjvx(cost, extend_cost=extend_cost,

+53

-29

src/_lapjvc/dense_wrap.hpp

		@@ -7,2 +7,3 @@ #include <pybind11/pybind11.h>
		#include <limits>
		#include <type_traits>

		@@ -12,7 +13,11 @@ #include "dense.hpp"
		/**
		Updated on 2025/10/16 by rathaROG
		Updated by rathaROG:

		The return value is changed to include total_cost as the first element of the tuple.
		This is to avoid recalculating the total cost in Python, which can be inefficient for
		large matrices.
		2025/10/30: Improve speed almost 2x for rectangular matrices by using ZERO padding
		for dummy rows/columns instead of LARGE_COST padding. This avoids filling the entire
		padded area with LARGE_COST and speeds up rectangular cases significantly.

		2025/10/16: The return value is changed to include total_cost as the first element of
		the tuple. This is to avoid recalculating the total cost in Python, which can be
		inefficient for large matrices.
		*/
		@@ -42,10 +47,10 @@
		bool any_finite = false;
		T max_abs_cost = 0;
		for(int i = 0; i < nrows*ncols; ++i) {
		if (std::isfinite((double)data[i])) {
		double max_abs_cost_d = 0.0;
		for (int i = 0; i < nrows * ncols; ++i) {
		// We cast to double for the finiteness check. For integer T this is always finite.
		double dv = static_cast<double>(data[i]);
		if (std::isfinite(dv)) {
		any_finite = true;
		// Careful: Note that std::abs() is not a template.
		// https://en.cppreference.com/w/cpp/numeric/math/abs
		// https://en.cppreference.com/w/cpp/numeric/math/fabs
		max_abs_cost = std::max<T>(max_abs_cost, std::abs(data[i]));
		// Use fabs on double to avoid template pitfalls and integer overflow on abs(INT_MIN).
		max_abs_cost_d = std::max(max_abs_cost_d, std::fabs(dv));
		}
		@@ -63,15 +68,33 @@ }
		const int n = std::max<int>(nrows, ncols);
		const T LARGE_COST = 2 * r * max_abs_cost + 1;
		std::vector<std::vector<T>> costs(n, std::vector<T>(n, LARGE_COST));

		for (int i = 0; i < nrows; i++)
		{
		T cptr = data + incols;
		for (int j = 0; j < ncols; j++)
		{
		// Compute a LARGE_COST sentinel for forbidden entries within the original rectangle.
		// Use double to compute and clamp if T is integral to avoid overflow.
		double large_cost_d = 2.0 * static_cast<double>(r) * max_abs_cost_d + 1.0;
		T LARGE_COST;
		if constexpr (std::is_floating_point<T>::value) {
		LARGE_COST = static_cast<T>(large_cost_d);
		} else {
		// Clamp to a safe large value for integer types.
		using Lim = std::numeric_limits<T>;
		double cap = std::min<double>(static_cast<double>(Lim::max()) / 2.0, large_cost_d);
		LARGE_COST = static_cast<T>(cap);
		}

		// Build square cost matrix with ZERO padding for dummy rows/columns (fast for rectangular).
		// Only set LARGE_COST for forbidden entries inside the original MxN region.
		std::vector<std::vector<T>> costs(n, std::vector<T>(n, T(0)));

		for (int i = 0; i < nrows; ++i) {
		T cptr = data + i ncols;
		for (int j = 0; j < ncols; ++j) {
		const T c = cptr[j];
		if (std::isfinite((double)c))
		costs[i][j] = c;
		// For floats: non-finite => forbidden. For integers: always finite => allowed.
		bool finite = std::isfinite(static_cast<double>(c));
		costs[i][j] = finite ? c : LARGE_COST;
		}
		}
		// Note:
		// - If nrows < ncols, rows [nrows..n-1] remain zero => dummy rows.
		// - If ncols < nrows, cols [ncols..n-1] remain zero => dummy columns.
		// This avoids filling the entire padded area with LARGE_COST and speeds up rectangular cases.

		@@ -82,12 +105,11 @@ std::vector<int> Lmate, Rmate;
		std::vector<int> rowids, colids;
		T total_cost = 0; // NEW: accumulate total assignment cost
		T total_cost = T(0);

		for (int i = 0; i < nrows; i++)
		{
		// Collect only real (row, col) matches. Exclude dummy columns (j >= ncols) and forbidden.
		for (int i = 0; i < nrows; ++i) {
		int mate = Lmate[i];
		if (Lmate[i] < ncols && costs[i][mate] != LARGE_COST)
		{
		if (mate >= 0 && mate < ncols && costs[i][mate] != LARGE_COST) {
		rowids.push_back(i);
		colids.push_back(mate);
		total_cost += costs[i][mate]; // Sum up the cost for each assigned pair
		total_cost = static_cast<T>(static_cast<double>(total_cost) + static_cast<double>(costs[i][mate]));
		}
		@@ -97,6 +119,8 @@ }
		if (return_cost)
		return py::make_tuple(total_cost, py::array(rowids.size(), rowids.data()), py::array(colids.size(), colids.data()));
		return py::make_tuple(total_cost,
		py::array(rowids.size(), rowids.data()),
		py::array(colids.size(), colids.data()));
		else
		return py::make_tuple(py::array(rowids.size(), rowids.data()), py::array(colids.size(), colids.data()));

		return py::make_tuple(py::array(rowids.size(), rowids.data()),
		py::array(colids.size(), colids.data()));
		}

-180

lap/lapjvs_batch.py

		# Copyright (c) 2025 Ratha SIV \| MIT License

		import os
		import numpy as np
		from typing import List, Tuple, Union
		from concurrent.futures import ThreadPoolExecutor, as_completed

		from .lapjvs import lapjvs as _lapjvs_single
		from .lapjvs import lapjvsa as _lapjvsa_single


		def _normalize_threads(n_threads: int) -> int:
		if n_threads is None or n_threads == 0:
		cpu = os.cpu_count() or 1
		return max(1, int(cpu))
		return max(1, int(n_threads))


		def _solve_one_jvs(
		a2d: np.ndarray,
		extend_cost: bool,
		prefer_float32: bool,
		jvx_like: bool,
		return_cost: bool,
		):
		# Calls the proven single-instance wrapper; it releases the GIL internally.
		return _lapjvs_single(
		a2d, extend_cost=extend_cost, return_cost=return_cost,
		jvx_like=jvx_like, prefer_float32=prefer_float32
		)


		def _solve_one_jvsa(
		a2d: np.ndarray,
		extend_cost: bool,
		prefer_float32: bool,
		return_cost: bool,
		):
		return _lapjvsa_single(
		a2d, extend_cost=extend_cost, return_cost=return_cost,
		prefer_float32=prefer_float32
		)


		def lapjvs_batch(
		costs: np.ndarray,
		extend_cost: bool = False,
		return_cost: bool = True,
		n_threads: int = 0,
		prefer_float32: bool = True,
		) -> Union[
		Tuple[np.ndarray, List[np.ndarray], List[np.ndarray]],
		Tuple[List[np.ndarray], List[np.ndarray]]
		]:
		"""
		Stable batched JVS built on the single-instance solver with a thread pool.
		- costs: (B, N, M) array-like (float32/float64)
		- extend_cost: pad rectangular inputs to square
		- return_cost: if True, returns totals (B,) first
		- n_threads: 0 -> os.cpu_count(), else exact number of threads
		- prefer_float32: forward to single-instance kernel

		Returns:
		if return_cost:
		(totals: (B,), rows_list: List[(K_b,)], cols_list: List[(K_b,)])
		else:
		(rows_list, cols_list)
		"""
		A = np.asarray(costs)
		if A.ndim != 3:
		raise ValueError("3-dimensional array expected [B, N, M]")

		B, N, M = A.shape
		threads = _normalize_threads(n_threads)

		# Preallocate outputs
		totals = np.empty((B,), dtype=np.float64) if return_cost else None
		rows_list: List[np.ndarray] = [None] * B # type: ignore
		cols_list: List[np.ndarray] = [None] * B # type: ignore

		# Worker
		def work(bi: int):
		a2d = A[bi]
		if return_cost:
		total, rows, cols = _solve_one_jvs(
		a2d, extend_cost=extend_cost, prefer_float32=prefer_float32,
		jvx_like=True, return_cost=True
		)
		return bi, total, rows, cols
		else:
		rows, cols = _solve_one_jvs(
		a2d, extend_cost=extend_cost, prefer_float32=prefer_float32,
		jvx_like=True, return_cost=False
		)
		return bi, None, rows, cols

		if threads == 1 or B == 1:
		# Serial path (still GIL-free inside the solver)
		for bi in range(B):
		idx, t, r, c = work(bi)
		if return_cost:
		totals[idx] = float(t) # type: ignore
		rows_list[idx] = np.asarray(r, dtype=np.int64)
		cols_list[idx] = np.asarray(c, dtype=np.int64)
		else:
		# Parallel path (ThreadPool is OK because solver releases GIL)
		with ThreadPoolExecutor(max_workers=threads) as ex:
		futures = [ex.submit(work, bi) for bi in range(B)]
		for fut in as_completed(futures):
		idx, t, r, c = fut.result()
		if return_cost:
		totals[idx] = float(t) # type: ignore
		rows_list[idx] = np.asarray(r, dtype=np.int64)
		cols_list[idx] = np.asarray(c, dtype=np.int64)

		if return_cost:
		return np.asarray(totals, dtype=np.float64), rows_list, cols_list # type: ignore
		else:
		return rows_list, cols_list


		def lapjvsa_batch(
		costs: np.ndarray,
		extend_cost: bool = False,
		return_cost: bool = True,
		n_threads: int = 0,
		prefer_float32: bool = True,
		) -> Union[Tuple[np.ndarray, List[np.ndarray]], List[np.ndarray]]:
		"""
		Stable batched JVS (pairs API) built on the single-instance wrapper with a thread pool.

		Returns:
		if return_cost:
		(totals: (B,), pairs_list: List[(K_b, 2)])
		else:
		(pairs_list,)
		"""
		A = np.asarray(costs)
		if A.ndim != 3:
		raise ValueError("3-dimensional array expected [B, N, M]")

		B = A.shape[0]
		threads = _normalize_threads(n_threads)

		totals = np.empty((B,), dtype=np.float64) if return_cost else None
		pairs_list: List[np.ndarray] = [None] * B # type: ignore

		def work(bi: int):
		a2d = A[bi]
		if return_cost:
		total, pairs = _solve_one_jvsa(
		a2d, extend_cost=extend_cost, prefer_float32=prefer_float32, return_cost=True
		)
		return bi, total, pairs
		else:
		pairs = _solve_one_jvsa(
		a2d, extend_cost=extend_cost, prefer_float32=prefer_float32, return_cost=False
		)
		return bi, None, pairs

		if threads == 1 or B == 1:
		for bi in range(B):
		idx, t, P = work(bi)
		if return_cost:
		totals[idx] = float(t) # type: ignore
		pairs_list[idx] = np.asarray(P, dtype=np.int64)
		else:
		with ThreadPoolExecutor(max_workers=threads) as ex:
		futures = [ex.submit(work, bi) for bi in range(B)]
		for fut in as_completed(futures):
		idx, t, P = fut.result()
		if return_cost:
		totals[idx] = float(t) # type: ignore
		pairs_list[idx] = np.asarray(P, dtype=np.int64)

		if return_cost:
		return np.asarray(totals, dtype=np.float64), pairs_list # type: ignore
		else:
		return pairs_list

-228

lap/lapjvs.py

		# Copyright (c) 2025 Ratha SIV \| MIT License

		import numpy as np
		from typing import Optional, Tuple, Union

		from ._lapjvs import lapjvs_native as _lapjvs_native
		from ._lapjvs import lapjvs_float32 as _lapjvs_float32
		from ._lapjvs import lapjvsa_native as _lapjvsa_native
		from ._lapjvs import lapjvsa_float32 as _lapjvsa_float32


		def lapjvs(
		cost: np.ndarray,
		extend_cost: Optional[bool] = None,
		return_cost: bool = True,
		jvx_like: bool = True,
		prefer_float32: bool = True,
		) -> Union[
		Tuple[float, np.ndarray, np.ndarray],
		Tuple[np.ndarray, np.ndarray]
		]:
		"""
		Solve the Linear Assignment Problem using the 'lapjvs' algorithm.

		- Accepts rectangular cost matrices (pad to square internally).
		- Returns lapjv-like (x,y) or jvx-like (rows, cols).
		- Optional prefer_float32 to reduce memory bandwidth.

		Parameters
		----------
		cost : np.ndarray
		Cost matrix of shape (n, m). dtype should be a floating type.
		extend_cost : Optional[bool]
		If True, treat rectangular inputs by extending (zero-padding) to square.
		If False, require square input.
		If None (default), auto-detect: extend if n != m.
		return_cost : bool
		If True (default), return total cost as the first element of the return tuple.
		jvx_like : bool
		If False (default), return lapjv-style mapping vectors:
		- return_cost=True -> (total_cost, x, y)
		- return_cost=False -> (x, y)
		If True, return lapjvx/SciPy-style arrays:
		- return_cost=True -> (total_cost, row_indices, col_indices)
		- return_cost=False -> (row_indices, col_indices)

		Notes
		-----
		- The solver kernel may run in float32 (when prefer_float32=True) or native float64,
		but the returned total cost is always recomputed from the ORIGINAL input array
		to preserve previous numeric behavior and parity with lapjv/lapjvx.
		- For rectangular inputs, zero-padding exactly models the rectangular LAP.
		"""
		# Keep the original array to compute the final cost from it (preserves previous behavior)
		a = np.asarray(cost)
		if a.ndim != 2:
		raise ValueError("cost must be a 2D array")

		n, m = a.shape
		extend = (n != m) if (extend_cost is None) else bool(extend_cost)

		# Choose backend and working dtype for the solver only (ensure contiguity to avoid hidden copies)
		use_float32_kernel = not ((prefer_float32 is False) and (a.dtype == np.float64))
		if use_float32_kernel:
		# Run float32 kernel (casting as needed)
		_kernel = _lapjvs_float32
		work = np.ascontiguousarray(a, dtype=np.float32)
		else:
		# Run native kernel on float64 inputs
		_kernel = _lapjvs_native
		work = np.ascontiguousarray(a, dtype=np.float64)

		def _rows_cols_from_x(x_vec: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
		if x_vec.size == 0:
		return np.empty((0,), dtype=np.int64), np.empty((0,), dtype=np.int64)
		mask = x_vec >= 0
		rows = np.nonzero(mask)[0]
		cols = x_vec[mask]
		return rows.astype(np.int64, copy=False), cols.astype(np.int64, copy=False)

		if not extend:
		# Square: call solver directly on chosen dtype, but compute total from ORIGINAL array
		x_raw_obj, y_raw_obj = _kernel(work)

		# Convert only what's needed; y conversion deferred if not used
		x_raw = np.asarray(x_raw_obj, dtype=np.int64)

		if jvx_like:
		rows, cols = _rows_cols_from_x(x_raw)
		if return_cost:
		total = float(a[np.arange(n), x_raw].sum()) if n > 0 else 0.0
		return total, rows, cols
		else:
		return rows, cols
		else:
		y_raw = np.asarray(y_raw_obj, dtype=np.int64)
		if return_cost:
		total = float(a[np.arange(n), x_raw].sum()) if n > 0 else 0.0
		return total, x_raw, y_raw
		else:
		return x_raw, y_raw

		# Rectangular: zero-pad to square, solve, then trim back; compute total from ORIGINAL array
		size = max(n, m)
		padded = np.empty((size, size), dtype=work.dtype)
		padded[:n, :m] = work
		if m < size:
		padded[:n, m:] = 0
		if n < size:
		padded[n:, :] = 0

		x_pad_obj, y_pad_obj = _kernel(padded)
		x_pad = np.asarray(x_pad_obj, dtype=np.int64)
		cols_pad_n = x_pad[:n]

		if jvx_like:
		if n == 0 or m == 0:
		rows = np.empty((0,), dtype=np.int64)
		cols = np.empty((0,), dtype=np.int64)
		total = 0.0
		else:
		mask_r = (cols_pad_n >= 0) & (cols_pad_n < m)
		rows = np.nonzero(mask_r)[0].astype(np.int64, copy=False)
		cols = cols_pad_n[mask_r].astype(np.int64, copy=False)
		total = float(a[rows, cols].sum()) if (return_cost and rows.size) else 0.0

		return (total, rows, cols) if return_cost else (rows, cols)

		# lapjv-like outputs (vectorized)
		x_out = np.full(n, -1, dtype=np.int64)
		mask_r = (cols_pad_n >= 0) & (cols_pad_n < m)
		if mask_r.any():
		x_out[mask_r] = cols_pad_n[mask_r]

		y_pad = np.asarray(y_pad_obj, dtype=np.int64)
		rows_pad_m = y_pad[:m]
		y_out = np.full(m, -1, dtype=np.int64)
		mask_c = (rows_pad_m >= 0) & (rows_pad_m < n)
		if mask_c.any():
		y_out[mask_c] = rows_pad_m[mask_c]

		if return_cost and n > 0 and m > 0:
		mask = (x_out >= 0)
		total = float(a[np.nonzero(mask)[0], x_out[mask]].sum()) if mask.any() else 0.0
		else:
		total = 0.0

		return (total, x_out, y_out) if return_cost else (x_out, y_out)


		def lapjvsa(
		cost: np.ndarray,
		extend_cost: Optional[bool] = None,
		return_cost: bool = True,
		prefer_float32: bool = True,
		) -> Union[
		Tuple[float, np.ndarray],
		np.ndarray
		]:
		"""
		Solve LAP using the 'lapjvs' algorithm (pairs API).

		- Accepts rectangular cost matrices (pads to square internally when needed).
		- Returns pairs array (K, 2) int64 with optional total cost (computed from ORIGINAL input).
		"""
		a = np.asarray(cost)
		if a.ndim != 2:
		raise ValueError("cost must be a 2D array")

		n, m = a.shape
		extend = (n != m) if (extend_cost is None) else bool(extend_cost)

		if not extend:
		# Square: call native pairs entry point on chosen dtype
		use_f32 = not ((prefer_float32 is False) and (a.dtype == np.float64))
		if use_f32:
		pairs_obj = _lapjvsa_float32(np.ascontiguousarray(a, dtype=np.float32))
		else:
		# Follow native dtype; if a is float32 or float64, both ok
		# Ensure contiguous to avoid hidden copies
		work = np.ascontiguousarray(a, dtype=a.dtype if a.dtype in (np.float32, np.float64) else np.float64)
		pairs_obj = _lapjvsa_native(work)
		pairs = np.asarray(pairs_obj, dtype=np.int64)

		if return_cost:
		if pairs.size:
		r = pairs[:, 0]; c = pairs[:, 1]
		total = float(a[r, c].sum())
		else:
		total = 0.0
		return total, pairs
		return pairs

		# Rectangular: zero-pad to square, solve pairs on padded, map back, compute total from ORIGINAL
		size = max(n, m)
		use_f32 = not ((prefer_float32 is False) and (a.dtype == np.float64))
		wdtype = np.float32 if use_f32 else (a.dtype if a.dtype in (np.float32, np.float64) else np.float64)
		padded = np.empty((size, size), dtype=wdtype)
		# copy original submatrix
		padded[:n, :m] = a.astype(wdtype, copy=False)
		if m < size:
		padded[:n, m:] = 0
		if n < size:
		padded[n:, :] = 0

		# Solve pairs on the padded square matrix
		if use_f32:
		pairs_pad_obj = _lapjvsa_float32(padded)
		else:
		pairs_pad_obj = _lapjvsa_native(padded)
		pairs_pad = np.asarray(pairs_pad_obj, dtype=np.int64)

		# Trim pairs to original rectangle
		if pairs_pad.size == 0 or n == 0 or m == 0:
		pairs = np.empty((0, 2), dtype=np.int64)
		total = 0.0
		else:
		r = pairs_pad[:, 0]
		c = pairs_pad[:, 1]
		mask = (r >= 0) & (r < n) & (c >= 0) & (c < m)
		if mask.any():
		pairs = np.stack([r[mask], c[mask]], axis=1).astype(np.int64, copy=False)
		total = float(a[pairs[:, 0], pairs[:, 1]].sum()) if return_cost else 0.0
		else:
		pairs = np.empty((0, 2), dtype=np.int64)
		total = 0.0

		return (total, pairs) if return_cost else pairs

-118

lap/lapjvx_batch.py

		# Copyright (c) 2025 Ratha SIV \| MIT License

		import os
		import numpy as np
		from typing import List, Tuple, Union
		from concurrent.futures import ThreadPoolExecutor, as_completed

		from ._lapjvx import lapjvx as _lapjvx_single
		from ._lapjvx import lapjvxa as _lapjvxa_single


		def _normalize_threads(n_threads: int) -> int:
		if not n_threads:
		return max(1, int(os.cpu_count() or 1))
		return max(1, int(n_threads))


		def lapjvx_batch(
		costs: np.ndarray,
		extend_cost: bool = False,
		cost_limit: float = np.inf,
		return_cost: bool = True,
		n_threads: int = 0,
		) -> Union[Tuple[np.ndarray, List[np.ndarray], List[np.ndarray]],
		Tuple[List[np.ndarray], List[np.ndarray]]]:
		A = np.asarray(costs)
		if A.ndim != 3:
		raise ValueError("3-dimensional array expected [B, N, M]")
		B = A.shape[0]
		threads = _normalize_threads(n_threads)

		totals = np.empty((B,), dtype=np.float64) if return_cost else None
		rows_list: List[np.ndarray] = [None] * B # type: ignore
		cols_list: List[np.ndarray] = [None] * B # type: ignore

		def work(bi: int):
		a2d = A[bi]
		if return_cost:
		total, rows, cols = _lapjvx_single(
		a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=True
		)
		return bi, total, rows, cols
		else:
		rows, cols = _lapjvx_single(
		a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=False
		)
		return bi, None, rows, cols

		if threads == 1 or B == 1:
		for bi in range(B):
		i, t, r, c = work(bi)
		if return_cost:
		totals[i] = float(t) # type: ignore
		rows_list[i] = np.asarray(r, dtype=np.int64)
		cols_list[i] = np.asarray(c, dtype=np.int64)
		else:
		with ThreadPoolExecutor(max_workers=threads) as ex:
		futures = [ex.submit(work, bi) for bi in range(B)]
		for fut in as_completed(futures):
		i, t, r, c = fut.result()
		if return_cost:
		totals[i] = float(t) # type: ignore
		rows_list[i] = np.asarray(r, dtype=np.int64)
		cols_list[i] = np.asarray(c, dtype=np.int64)

		if return_cost:
		return np.asarray(totals, dtype=np.float64), rows_list, cols_list # type: ignore
		return rows_list, cols_list


		def lapjvxa_batch(
		costs: np.ndarray,
		extend_cost: bool = False,
		cost_limit: float = np.inf,
		return_cost: bool = True,
		n_threads: int = 0,
		) -> Union[Tuple[np.ndarray, List[np.ndarray]], List[np.ndarray]]:
		A = np.asarray(costs)
		if A.ndim != 3:
		raise ValueError("3-dimensional array expected [B, N, M]")
		B = A.shape[0]
		threads = _normalize_threads(n_threads)

		totals = np.empty((B,), dtype=np.float64) if return_cost else None
		pairs_list: List[np.ndarray] = [None] * B # type: ignore

		def work(bi: int):
		a2d = A[bi]
		if return_cost:
		total, pairs = _lapjvxa_single(
		a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=True
		)
		return bi, total, pairs
		else:
		pairs = _lapjvxa_single(
		a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=False
		)
		return bi, None, pairs

		if threads == 1 or B == 1:
		for bi in range(B):
		i, t, P = work(bi)
		if return_cost:
		totals[i] = float(t) # type: ignore
		pairs_list[i] = np.asarray(P, dtype=np.int64)
		else:
		with ThreadPoolExecutor(max_workers=threads) as ex:
		futures = [ex.submit(work, bi) for bi in range(B)]
		for fut in as_completed(futures):
		i, t, P = fut.result()
		if return_cost:
		totals[i] = float(t) # type: ignore
		pairs_list[i] = np.asarray(P, dtype=np.int64)

		if return_cost:
		return np.asarray(totals, dtype=np.float64), pairs_list # type: ignore
		return pairs_list

-342

lap/lapmod.py

		# Copyright (c) 2012-2025 Tomas Kazmar \| BSD-2-Clause License

		import numpy as np
		from bisect import bisect_left

		# import logging

		from ._lapjv import _lapmod, FP_DYNAMIC_ as FP_DYNAMIC, LARGE_ as LARGE


		def _pycrrt(n, cc, ii, kk, free_rows, x, y, v):
		# log = logging.getLogger('do_column_reduction_and_reduction_transfer')
		x[:] = -1
		y[:] = -1
		v[:] = LARGE
		for i in range(n):
		ks = slice(ii[i], ii[i+1])
		js = kk[ks]
		ccs = cc[ks]
		mask = ccs < v[js]
		js = js[mask]
		v[js] = ccs[mask]
		y[js] = i
		# log.debug('v = %s', v)
		# for j in range(cost.shape[1]):
		unique = np.empty((n,), dtype=bool)
		unique[:] = True
		for j in range(n-1, -1, -1):
		i = y[j]
		# If row is not taken yet, initialize it with the minimum stored in y.
		if x[i] < 0:
		x[i] = j
		else:
		unique[i] = False
		y[j] = -1
		# log.debug('bw %s %s %s %s', i, j, x, y)
		# log.debug('unique %s', unique)
		n_free_rows = 0
		for i in range(n):
		# Store unassigned row i.
		if x[i] < 0:
		free_rows[n_free_rows] = i
		n_free_rows += 1
		elif unique[i] and ii[i+1] - ii[i] > 1:
		# >1 check prevents choking on rows with a single entry
		# Transfer from an assigned row.
		j = x[i]
		# Find the current 2nd minimum of the reduced column costs:
		# (cost[i,j] - v[j]) for some j.
		ks = slice(ii[i], ii[i+1])
		js = kk[ks]
		minv = np.min(cc[ks][js != j] - v[js][js != j])
		# log.debug("v[%d] = %f - %f", j, v[j], minv)
		v[j] -= minv
		# log.debug('free: %s', free_rows[:n_free_rows])
		# log.debug('%s %s', x, v)
		return n_free_rows


		def find_minima(indices, values):
		if len(indices) > 0:
		j1 = indices[0]
		v1 = values[0]
		else:
		j1 = 0
		v1 = LARGE
		j2 = -1
		v2 = LARGE
		# log = logging.getLogger('find_minima')
		# log.debug(sorted(zip(values, indices))[:2])
		for j, h in zip(indices[1:], values[1:]):
		# log.debug('%d = %f %d = %f', j1, v1, j2, v2)
		if h < v2:
		if h >= v1:
		v2 = h
		j2 = j
		else:
		v2 = v1
		v1 = h
		j2 = j1
		j1 = j
		# log.debug('%d = %f %d = %f', j1, v1, j2, v2)
		return j1, v1, j2, v2


		def _pyarr(n, cc, ii, kk, n_free_rows, free_rows, x, y, v):
		# log = logging.getLogger('do_augmenting_row_reduction')
		# log.debug('%s %s %s', x, y, v)
		current = 0
		# log.debug('free: %s', free_rows[:n_free_rows])
		new_free_rows = 0
		while current < n_free_rows:
		free_i = free_rows[current]
		# log.debug('current = %d', current)
		current += 1
		ks = slice(ii[free_i], ii[free_i+1])
		js = kk[ks]
		j1, v1, j2, v2 = find_minima(js, cc[ks] - v[js])
		i0 = y[j1]
		v1_new = v[j1] - (v2 - v1)
		v1_lowers = v1_new < v[j1]
		# log.debug(
		# '%d %d 1=%s,%f 2=%s,%f %f %s',
		# free_i, i0, j1, v1, j2, v2, v1_new, v1_lowers)
		if v1_lowers:
		v[j1] = v1_new
		elif i0 >= 0 and j2 != -1: # i0 is assigned, try j2
		j1 = j2
		i0 = y[j2]
		x[free_i] = j1
		y[j1] = free_i
		if i0 >= 0:
		if v1_lowers:
		current -= 1
		# log.debug('continue augmenting path from current %s %s %s')
		free_rows[current] = i0
		else:
		# log.debug('stop the augmenting path and keep for later')
		free_rows[new_free_rows] = i0
		new_free_rows += 1
		# log.debug('free: %s', free_rows[:new_free_rows])
		return new_free_rows


		def binary_search(data, key):
		# log = logging.getLogger('binary_search')
		i = bisect_left(data, key)
		# log.debug('Found data[%d]=%d for %d', i, data[i], key)
		if i < len(data) and data[i] == key:
		return i
		else:
		return None


		def _find(hi, d, cols, y):
		lo, hi = hi, hi + 1
		minv = d[cols[lo]]
		# XXX: anytime this happens to be NaN, i'm screwed...
		# assert not np.isnan(minv)
		for k in range(hi, len(cols)):
		j = cols[k]
		if d[j] <= minv:
		# New minimum found, trash the new SCAN columns found so far.
		if d[j] < minv:
		hi = lo
		minv = d[j]
		cols[k], cols[hi] = cols[hi], j
		hi += 1
		return minv, hi, cols


		def _scan(n, cc, ii, kk, minv, lo, hi, d, cols, pred, y, v):
		# log = logging.getLogger('_scan')
		# Scan all TODO columns.
		while lo != hi:
		j = cols[lo]
		lo += 1
		i = y[j]
		# log.debug('?%d kk[%d:%d]=%s', j, ii[i], ii[i+1], kk[ii[i]:ii[i+1]])
		kj = binary_search(kk[ii[i]:ii[i+1]], j)
		if kj is None:
		continue
		kj = ii[i] + kj
		h = cc[kj] - v[j] - minv
		# log.debug('i=%d j=%d kj=%s h=%f', i, j, kj, h)
		for k in range(hi, n):
		j = cols[k]
		kj = binary_search(kk[ii[i]:ii[i+1]], j)
		if kj is None:
		continue
		kj = ii[i] + kj
		cred_ij = cc[kj] - v[j] - h
		if cred_ij < d[j]:
		d[j] = cred_ij
		pred[j] = i
		if cred_ij == minv:
		if y[j] < 0:
		return j, None, None, d, cols, pred
		cols[k] = cols[hi]
		cols[hi] = j
		hi += 1
		return -1, lo, hi, d, cols, pred


		def find_path(n, cc, ii, kk, start_i, y, v):
		# log = logging.getLogger('find_path')
		cols = np.arange(n, dtype=int)
		pred = np.empty((n,), dtype=int)
		pred[:] = start_i
		d = np.empty((n,), dtype=float)
		d[:] = LARGE
		ks = slice(ii[start_i], ii[start_i+1])
		js = kk[ks]
		d[js] = cc[ks] - v[js]
		# log.debug('d = %s', d)
		minv = LARGE
		lo, hi = 0, 0
		n_ready = 0
		final_j = -1
		while final_j == -1:
		# No SCAN columns, find new ones.
		if lo == hi:
		# log.debug('%d..%d -> find', lo, hi)
		# log.debug('cols = %s', cols)
		n_ready = lo
		minv, hi, cols = _find(hi, d, cols, y)
		# log.debug('%d..%d -> check', lo, hi)
		# log.debug('cols = %s', cols)
		# log.debug('y = %s', y)
		for h in range(lo, hi):
		# If any of the new SCAN columns is unassigned, use it.
		if y[cols[h]] < 0:
		final_j = cols[h]
		if final_j == -1:
		# log.debug('%d..%d -> scan', lo, hi)
		final_j, lo, hi, d, cols, pred = _scan(
		n, cc, ii, kk, minv, lo, hi, d, cols, pred, y, v)
		# log.debug('d = %s', d)
		# log.debug('cols = %s', cols)
		# log.debug('pred = %s', pred)

		# Update prices for READY columns.
		for k in range(n_ready):
		j0 = cols[k]
		v[j0] += d[j0] - minv

		assert final_j >= 0
		assert final_j < n
		return final_j, pred


		def _pya(n, cc, ii, kk, n_free_rows, free_rows, x, y, v):
		# log = logging.getLogger('augment')
		for free_i in free_rows[:n_free_rows]:
		# log.debug('looking at free_i=%s', free_i)
		j, pred = find_path(n, cc, ii, kk, free_i, y, v)
		# Augment the path starting from column j and backtracking to free_i.
		i = -1
		while i != free_i:
		# log.debug('augment %s', j)
		# log.debug('pred = %s', pred)
		i = pred[j]
		assert i >= 0
		assert i < n
		# log.debug('y[%d]=%d -> %d', j, y[j], i)
		y[j] = i
		j, x[i] = x[i], j


		def check_cost(n, cc, ii, kk):
		if n == 0:
		raise ValueError('Cost matrix has zero rows.')
		if len(kk) == 0:
		raise ValueError('Cost matrix has zero columns.')
		lo = cc.min()
		hi = cc.max()
		if lo < 0:
		raise ValueError('Cost matrix values must be non-negative.')
		if hi >= LARGE:
		raise ValueError(
		'Cost matrix values must be less than %s' % LARGE)


		def get_cost(n, cc, ii, kk, x0):
		ret = 0
		for i, j in enumerate(x0):
		kj = binary_search(kk[ii[i]:ii[i+1]], j)
		if kj is None:
		return np.inf
		kj = ii[i] + kj
		ret += cc[kj]
		return ret


		def lapmod(n, cc, ii, kk, fast=True, return_cost=True, fp_version=FP_DYNAMIC):
		"""Solve sparse linear assignment problem using Jonker-Volgenant algorithm.

		n: number of rows of the assignment cost matrix
		cc: 1D array of all finite elements of the assignement cost matrix
		ii: 1D array of indices of the row starts in cc. The following must hold:
		ii[0] = 0 and ii[n+1] = len(cc).
		kk: 1D array of the column indices so that:
		cost[i, kk[ii[i] + k]] == cc[ii[i] + k].
		Indices within one row must be sorted.
		extend_cost: whether or not extend a non-square matrix [default: False]
		cost_limit: an upper limit for a cost of a single assignment
		[default: np.inf]
		return_cost: whether or not to return the assignment cost

		Returns (opt, x, y) where:
		opt: cost of the assignment
		x: vector of columns assigned to rows
		y: vector of rows assigned to columns
		or (x, y) if return_cost is not True.

		When extend_cost and/or cost_limit is set, all unmatched entries will be
		marked by -1 in x/y.
		"""
		# log = logging.getLogger('lapmod')

		check_cost(n, cc, ii, kk)

		if fast is True:
		# log.debug('[----CR & RT & ARR & augmentation ----]')
		x, y = _lapmod(n, cc, ii, kk, fp_version=fp_version)
		else:
		cc = np.ascontiguousarray(cc, dtype=np.float64)
		ii = np.ascontiguousarray(ii, dtype=np.int32)
		kk = np.ascontiguousarray(kk, dtype=np.int32)
		x = np.empty((n,), dtype=np.int32)
		y = np.empty((n,), dtype=np.int32)
		v = np.empty((n,), dtype=np.float64)
		free_rows = np.empty((n,), dtype=np.int32)
		# log.debug('[----Column reduction & reduction transfer----]')
		n_free_rows = _pycrrt(n, cc, ii, kk, free_rows, x, y, v)
		# log.debug(
		# 'free, x, y, v: %s %s %s %s', free_rows[:n_free_rows], x, y, v)
		if n_free_rows == 0:
		# log.info('Reduction solved it.')
		if return_cost is True:
		return get_cost(n, cc, ii, kk, x), x, y
		else:
		return x, y
		for it in range(2):
		# log.debug('[---Augmenting row reduction (iteration: %d)---]', it)
		n_free_rows = _pyarr(
		n, cc, ii, kk, n_free_rows, free_rows, x, y, v)
		# log.debug(
		# 'free, x, y, v: %s %s %s %s', free_rows[:n_free_rows], x, y, v)
		if n_free_rows == 0:
		# log.info('Augmenting row reduction solved it.')
		if return_cost is True:
		return get_cost(n, cc, ii, kk, x), x, y
		else:
		return x, y
		# log.info('[----Augmentation----]')
		_pya(n, cc, ii, kk, n_free_rows, free_rows, x, y, v)
		# log.debug('x, y, v: %s %s %s', x, y, v)
		if return_cost is True:
		return get_cost(n, cc, ii, kk, x), x, y
		else:
		return x, y

src/_lapjv/_lapjv.cpp

Sorry, the diff of this file is too big to display

src/_lapjv/_lapjvx.cpp

Sorry, the diff of this file is too big to display

lapx - pypi Package Compare versions

Improved metrics