# Copyright (c) 2025 Ratha SIV \| MIT License

		import os
		import numpy as np
		from typing import List, Tuple, Union
		from concurrent.futures import ThreadPoolExecutor, as_completed

		# We build batch on top of the stable single-instance API (releases GIL inside)
		from .lapjvs import lapjvs as _lapjvs_single
		from .lapjvs import lapjvsa as _lapjvsa_single


		def _normalize_threads(n_threads: int) -> int:
		if n_threads is None or n_threads == 0:
		cpu = os.cpu_count() or 1
		return max(1, int(cpu))
		return max(1, int(n_threads))


		def _solve_one_jvs(
		a2d: np.ndarray,
		extend_cost: bool,
		prefer_float32: bool,
		jvx_like: bool,
		return_cost: bool,
		):
		# Calls the proven single-instance wrapper; it releases the GIL internally.
		return _lapjvs_single(
		a2d, extend_cost=extend_cost, return_cost=return_cost,
		jvx_like=jvx_like, prefer_float32=prefer_float32
		)


		def _solve_one_jvsa(
		a2d: np.ndarray,
		extend_cost: bool,
		prefer_float32: bool,
		return_cost: bool,
		):
		return _lapjvsa_single(
		a2d, extend_cost=extend_cost, return_cost=return_cost,
		prefer_float32=prefer_float32
		)


		def lapjvs_batch(
		costs: np.ndarray,
		extend_cost: bool = False,
		return_cost: bool = True,
		n_threads: int = 0,
		prefer_float32: bool = True,
		) -> Union[
		Tuple[np.ndarray, List[np.ndarray], List[np.ndarray]],
		Tuple[List[np.ndarray], List[np.ndarray]]
		]:
		"""
		Stable batched JVS built on the single-instance solver with a thread pool.
		- costs: (B, N, M) array-like (float32/float64)
		- extend_cost: pad rectangular inputs to square
		- return_cost: if True, returns totals (B,) first
		- n_threads: 0 -> os.cpu_count(), else exact number of threads
		- prefer_float32: forward to single-instance kernel

		Returns:
		if return_cost:
		(totals: (B,), rows_list: List[(K_b,)], cols_list: List[(K_b,)])
		else:
		(rows_list, cols_list)
		"""
		A = np.asarray(costs)
		if A.ndim != 3:
		raise ValueError("3-dimensional array expected [B, N, M]")

		B, N, M = A.shape
		threads = _normalize_threads(n_threads)

		# Preallocate outputs
		totals = np.empty((B,), dtype=np.float64) if return_cost else None
		rows_list: List[np.ndarray] = [None] * B # type: ignore
		cols_list: List[np.ndarray] = [None] * B # type: ignore

		# Worker
		def work(bi: int):
		a2d = A[bi]
		if return_cost:
		total, rows, cols = _solve_one_jvs(
		a2d, extend_cost=extend_cost, prefer_float32=prefer_float32,
		jvx_like=True, return_cost=True
		)
		return bi, total, rows, cols
		else:
		rows, cols = _solve_one_jvs(
		a2d, extend_cost=extend_cost, prefer_float32=prefer_float32,
		jvx_like=True, return_cost=False
		)
		return bi, None, rows, cols

		if threads == 1 or B == 1:
		# Serial path (still GIL-free inside the solver)
		for bi in range(B):
		idx, t, r, c = work(bi)
		if return_cost:
		totals[idx] = float(t) # type: ignore
		rows_list[idx] = np.asarray(r, dtype=np.int64)
		cols_list[idx] = np.asarray(c, dtype=np.int64)
		else:
		# Parallel path (ThreadPool is OK because solver releases GIL)
		with ThreadPoolExecutor(max_workers=threads) as ex:
		futures = [ex.submit(work, bi) for bi in range(B)]
		for fut in as_completed(futures):
		idx, t, r, c = fut.result()
		if return_cost:
		totals[idx] = float(t) # type: ignore
		rows_list[idx] = np.asarray(r, dtype=np.int64)
		cols_list[idx] = np.asarray(c, dtype=np.int64)

		if return_cost:
		return np.asarray(totals, dtype=np.float64), rows_list, cols_list # type: ignore
		else:
		return rows_list, cols_list


		def lapjvsa_batch(
		costs: np.ndarray,
		extend_cost: bool = False,
		return_cost: bool = True,
		n_threads: int = 0,
		prefer_float32: bool = True,
		) -> Union[Tuple[np.ndarray, List[np.ndarray]], List[np.ndarray]]:
		"""
		Stable batched JVS (pairs API) built on the single-instance wrapper with a thread pool.

		Returns:
		if return_cost:
		(totals: (B,), pairs_list: List[(K_b, 2)])
		else:
		(pairs_list,)
		"""
		A = np.asarray(costs)
		if A.ndim != 3:
		raise ValueError("3-dimensional array expected [B, N, M]")

		B = A.shape[0]
		threads = _normalize_threads(n_threads)

		totals = np.empty((B,), dtype=np.float64) if return_cost else None
		pairs_list: List[np.ndarray] = [None] * B # type: ignore

		def work(bi: int):
		a2d = A[bi]
		if return_cost:
		total, pairs = _solve_one_jvsa(
		a2d, extend_cost=extend_cost, prefer_float32=prefer_float32, return_cost=True
		)
		return bi, total, pairs
		else:
		pairs = _solve_one_jvsa(
		a2d, extend_cost=extend_cost, prefer_float32=prefer_float32, return_cost=False
		)
		return bi, None, pairs

		if threads == 1 or B == 1:
		for bi in range(B):
		idx, t, P = work(bi)
		if return_cost:
		totals[idx] = float(t) # type: ignore
		pairs_list[idx] = np.asarray(P, dtype=np.int64)
		else:
		with ThreadPoolExecutor(max_workers=threads) as ex:
		futures = [ex.submit(work, bi) for bi in range(B)]
		for fut in as_completed(futures):
		idx, t, P = fut.result()
		if return_cost:
		totals[idx] = float(t) # type: ignore
		pairs_list[idx] = np.asarray(P, dtype=np.int64)

		if return_cost:
		return np.asarray(totals, dtype=np.float64), pairs_list # type: ignore
		else:
		return pairs_list

+116

lap/lapjvx_batch.py

		# Copyright (c) 2025 Ratha SIV \| MIT License
		import os
		import numpy as np
		from typing import List, Tuple, Union
		from concurrent.futures import ThreadPoolExecutor, as_completed

		from ._lapjvx import lapjvx as _lapjvx_single
		from ._lapjvx import lapjvxa as _lapjvxa_single


		def _normalize_threads(n_threads: int) -> int:
		if not n_threads:
		return max(1, int(os.cpu_count() or 1))
		return max(1, int(n_threads))


		def lapjvx_batch(
		costs: np.ndarray,
		extend_cost: bool = False,
		cost_limit: float = np.inf,
		return_cost: bool = True,
		n_threads: int = 0,
		) -> Union[Tuple[np.ndarray, List[np.ndarray], List[np.ndarray]],
		Tuple[List[np.ndarray], List[np.ndarray]]]:
		A = np.asarray(costs)
		if A.ndim != 3:
		raise ValueError("3-dimensional array expected [B, N, M]")
		B = A.shape[0]
		threads = _normalize_threads(n_threads)

		totals = np.empty((B,), dtype=np.float64) if return_cost else None
		rows_list: List[np.ndarray] = [None] * B # type: ignore
		cols_list: List[np.ndarray] = [None] * B # type: ignore

		def work(bi: int):
		a2d = A[bi]
		if return_cost:
		total, rows, cols = _lapjvx_single(
		a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=True
		)
		return bi, total, rows, cols
		else:
		rows, cols = _lapjvx_single(
		a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=False
		)
		return bi, None, rows, cols

		if threads == 1 or B == 1:
		for bi in range(B):
		i, t, r, c = work(bi)
		if return_cost:
		totals[i] = float(t) # type: ignore
		rows_list[i] = np.asarray(r, dtype=np.int64)
		cols_list[i] = np.asarray(c, dtype=np.int64)
		else:
		with ThreadPoolExecutor(max_workers=threads) as ex:
		futures = [ex.submit(work, bi) for bi in range(B)]
		for fut in as_completed(futures):
		i, t, r, c = fut.result()
		if return_cost:
		totals[i] = float(t) # type: ignore
		rows_list[i] = np.asarray(r, dtype=np.int64)
		cols_list[i] = np.asarray(c, dtype=np.int64)

		if return_cost:
		return np.asarray(totals, dtype=np.float64), rows_list, cols_list # type: ignore
		return rows_list, cols_list


		def lapjvxa_batch(
		costs: np.ndarray,
		extend_cost: bool = False,
		cost_limit: float = np.inf,
		return_cost: bool = True,
		n_threads: int = 0,
		) -> Union[Tuple[np.ndarray, List[np.ndarray]], List[np.ndarray]]:
		A = np.asarray(costs)
		if A.ndim != 3:
		raise ValueError("3-dimensional array expected [B, N, M]")
		B = A.shape[0]
		threads = _normalize_threads(n_threads)

		totals = np.empty((B,), dtype=np.float64) if return_cost else None
		pairs_list: List[np.ndarray] = [None] * B # type: ignore

		def work(bi: int):
		a2d = A[bi]
		if return_cost:
		total, pairs = _lapjvxa_single(
		a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=True
		)
		return bi, total, pairs
		else:
		pairs = _lapjvxa_single(
		a2d, extend_cost=extend_cost, cost_limit=cost_limit, return_cost=False
		)
		return bi, None, pairs

		if threads == 1 or B == 1:
		for bi in range(B):
		i, t, P = work(bi)
		if return_cost:
		totals[i] = float(t) # type: ignore
		pairs_list[i] = np.asarray(P, dtype=np.int64)
		else:
		with ThreadPoolExecutor(max_workers=threads) as ex:
		futures = [ex.submit(work, bi) for bi in range(B)]
		for fut in as_completed(futures):
		i, t, P = fut.result()
		if return_cost:
		totals[i] = float(t) # type: ignore
		pairs_list[i] = np.asarray(P, dtype=np.int64)

		if return_cost:
		return np.asarray(totals, dtype=np.float64), pairs_list # type: ignore
		return pairs_list

+1

-0

benchmark.md

		@@ -18,2 +18,3 @@ # 🏆 Quick Benchmark
		python benchmark.py
		python benchmark_batch.py
		```
		@@ -20,0 +21,0 @@

+48

-17

lap/__init__.py

		@@ -6,7 +6,6 @@ # Copyright (c) 2025 Ratha SIV \| MIT License

		Common Parameters for lapjv, lapjvx, lapjvxa
		--------------------------------------------

		cost : (N,M) ndarray
		Cost matrix. Entry `cost[i, j]` is the cost of assigning row `i` to column `j`.
		Single-matrix solvers — common parameters
		-----------------------------------------
		cost : (N, M) ndarray
		Cost matrix. Entry cost[i, j] is the cost of assigning row i to column j.
		extend_cost : bool, optional
		@@ -19,20 +18,47 @@ Whether or not to extend a non-square matrix. Default: False.

		Provided solvers
		----------------
		Batch solvers — common parameters
		---------------------------------
		costs : (B, N, M) ndarray
		A batch of cost matrices. costs[b] is the cost matrix for batch item b.
		extend_cost : bool, optional
		Whether to extend non-square matrices in the batch. Default: False.
		cost_limit : float, optional
		An upper limit for the cost of a single assignment. Default: np.inf.
		return_cost : bool, optional
		Whether to return the assignment costs per batch item.
		n_threads : int, optional
		Number of threads to use. 0 selects a runtime default. Default: 0.
		prefer_float32 : bool, optional
		Prefer the float32 kernel when possible to reduce memory bandwidth.
		Defaults to True for lapjvs-based batch solvers.

		Provided solvers (single-matrix)
		--------------------------------
		- lapmod : Sparse assignment solver (for sparse cost matrices) by Tomas Kazmar's lap.
		- lapjv : JV assignment solver by Tomas Kazmar's lap; returns JV-style mappings (x, y).
		- lapjvx : Enhanced lapjv by lapx; returns with SciPy-like outputs (rows, cols).
		- lapjvx : Enhanced lapjv by lapx; returns SciPy-like outputs (rows, cols).
		- lapjvxa : Convenience wrapper of lapjvx by lapx; returns (K, 2) assignment pairs.
		- lapjvc : Classic JV variant by Christoph Heindl's lapsolver; returns (rows, cols).
		- lapjvs : Enhanced Vadim Markovtsev's lapjv by lapx; returns either style.
		- lapjvsa : Convenience wrapper of lapjvs by lapx; returns (K, 2) assignment pairs.

		Provided solvers (batch)
		------------------------
		- lapjvx_batch : Batched lapjvx; returns (totals, rows_list, cols_list) or (rows_list, cols_list).
		- lapjvxa_batch : Batched lapjvxa; returns (totals, pairs_list) or pairs_list with (K_b, 2).
		- lapjvs_batch : Batched lapjvs; returns (totals, rows_list, cols_list) or (rows_list, cols_list).
		- lapjvsa_batch : Batched lapjvsa; returns (totals, pairs_list) or pairs_list with (K_b, 2).

		Notes
		-----
		- All solvers in lapx handle both square and rectangular cost matrices.
		- Output formats may differ by solvers and input parameters.
		- For details and benchmarks, see the official repo https://github.com/rathaROG/lapx .
		- Batch solvers accept costs shaped (B, N, M) and return per-instance assignments.
		- lapjvs and lapjvs_* wrappers may recompute the total cost from the original input
		for float32/float64 parity; this has negligible overhead.
		- For details and benchmarks, see the official repo: https://github.com/rathaROG/lapx
		"""

		__version__ = '0.7.1'
		__version__ = '0.8.0'

		# Single-matrix solvers
		from .lapmod import lapmod
		@@ -46,9 +72,14 @@ from ._lapjv import (
		)
		from ._lapjvx import (
		lapjvx,
		lapjvxa
		)
		from ._lapjvx import lapjvx, lapjvxa
		from ._lapjvc import lapjvc
		from .lapjvs import lapjvs
		from .lapjvs import lapjvs, lapjvsa

		__all__ = ['lapmod', 'lapjv', 'lapjvx', 'lapjvxa', 'lapjvc', 'lapjvs', 'FP_1', 'FP_2', 'FP_DYNAMIC', 'LARGE']
		# Batch solvers
		from .lapjvx_batch import lapjvx_batch, lapjvxa_batch
		from .lapjvs_batch import lapjvs_batch, lapjvsa_batch

		__all__ = [
		'lapjv', 'lapjvx', 'lapjvxa', 'lapjvc', 'lapjvs', 'lapjvsa',
		'lapjvx_batch', 'lapjvxa_batch', 'lapjvs_batch', 'lapjvsa_batch',
		'lapmod', 'FP_1', 'FP_2', 'FP_DYNAMIC', 'LARGE'
		]

+79

-0

lap/lapjvs.py

		@@ -8,2 +8,4 @@ # Copyright (c) 2025 Ratha SIV \| MIT License
		from ._lapjvs import lapjvs_float32 as _lapjvs_float32
		from ._lapjvs import lapjvsa_native as _lapjvsa_native
		from ._lapjvs import lapjvsa_float32 as _lapjvsa_float32

		@@ -145,1 +147,78 @@
		return (total, x_out, y_out) if return_cost else (x_out, y_out)


		def lapjvsa(
		cost: np.ndarray,
		extend_cost: Optional[bool] = None,
		return_cost: bool = True,
		prefer_float32: bool = True,
		):
		"""
		Solve LAP using the 'lapjvs' algorithm (pairs API).

		- Accepts rectangular cost matrices (pads to square internally when needed).
		- Returns pairs array (K, 2) int64 with optional total cost (computed from ORIGINAL input).
		"""
		a = np.asarray(cost)
		if a.ndim != 2:
		raise ValueError("cost must be a 2D array")

		n, m = a.shape
		extend = (n != m) if (extend_cost is None) else bool(extend_cost)

		if not extend:
		# Square: call native pairs entry point on chosen dtype
		use_f32 = not ((prefer_float32 is False) and (a.dtype == np.float64))
		if use_f32:
		pairs_obj = _lapjvsa_float32(np.ascontiguousarray(a, dtype=np.float32))
		else:
		# Follow native dtype; if a is float32 or float64, both ok
		# Ensure contiguous to avoid hidden copies
		work = np.ascontiguousarray(a, dtype=a.dtype if a.dtype in (np.float32, np.float64) else np.float64)
		pairs_obj = _lapjvsa_native(work)
		pairs = np.asarray(pairs_obj, dtype=np.int64)

		if return_cost:
		if pairs.size:
		r = pairs[:, 0]; c = pairs[:, 1]
		total = float(a[r, c].sum())
		else:
		total = 0.0
		return total, pairs
		return pairs

		# Rectangular: zero-pad to square, solve pairs on padded, map back, compute total from ORIGINAL
		size = max(n, m)
		use_f32 = not ((prefer_float32 is False) and (a.dtype == np.float64))
		wdtype = np.float32 if use_f32 else (a.dtype if a.dtype in (np.float32, np.float64) else np.float64)
		padded = np.empty((size, size), dtype=wdtype)
		# copy original submatrix
		padded[:n, :m] = a.astype(wdtype, copy=False)
		if m < size:
		padded[:n, m:] = 0
		if n < size:
		padded[n:, :] = 0

		# Solve pairs on the padded square matrix
		if use_f32:
		pairs_pad_obj = _lapjvsa_float32(padded)
		else:
		pairs_pad_obj = _lapjvsa_native(padded)
		pairs_pad = np.asarray(pairs_pad_obj, dtype=np.int64)

		# Trim pairs to original rectangle
		if pairs_pad.size == 0 or n == 0 or m == 0:
		pairs = np.empty((0, 2), dtype=np.int64)
		total = 0.0
		else:
		r = pairs_pad[:, 0]
		c = pairs_pad[:, 1]
		mask = (r >= 0) & (r < n) & (c >= 0) & (c < m)
		if mask.any():
		pairs = np.stack([r[mask], c[mask]], axis=1).astype(np.int64, copy=False)
		total = float(a[pairs[:, 0], pairs[:, 1]].sum()) if return_cost else 0.0
		else:
		pairs = np.empty((0, 2), dtype=np.int64)
		total = 0.0

		return (total, pairs) if return_cost else pairs

+126

-35

lapx.egg-info/PKG-INFO

		Metadata-Version: 2.4
		Name: lapx
		Version: 0.7.1
		Summary: Linear assignment problem solvers
		Version: 0.8.0
		Summary: Linear assignment problem solvers, including single and batch solvers.
		Home-page: https://github.com/rathaROG/lapx
		@@ -9,3 +9,3 @@ Author: Ratha SIV
		License: MIT
		Keywords: Linear Assignment Problem Solver,LAP solver,Jonker-Volgenant Algorithm,LAPJV,LAPMOD,lap,lapx,lapjvx,lapjvxa,lapjvc,lapjvs
		Keywords: Linear Assignment Problem Solver,LAP solver,Jonker-Volgenant Algorithm,LAPJV,LAPMOD,lap,lapx,lapjvx,lapjvxa,lapjvc,lapjvs,lapjvsalapjvx_batch,lapjvxa_batch,lapjvs_batch,lapjvsa_batch
		Classifier: Development Status :: 4 - Beta
		@@ -55,5 +55,6 @@ Classifier: Environment :: Console

		- 2025/10/21: `lapx` [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) introduced `lapjvs()`.
		- 2025/10/16: `lapx` [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) introduced `lapjvx()`, `lapjvxa()`, and `lapjvc()`.
		- 2025/10/15: Added Python 3.14 support and [more](https://github.com/rathaROG/lapx/pull/15).
		- 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) introduced `lapjvsa()`, `lapjvx_batch()`, `lapjvxa_batch()`, `lapjvs_batch()` and `lapjvsa_batch()`.
		- 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) introduced `lapjvs()`.
		- 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) introduced `lapjvx()`, `lapjvxa()`, and `lapjvc()`.
		- 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support.
		- 2024/12/01: The original [`lap`](https://github.com/gatagat/lap) and [`lapx`](https://github.com/rathaROG/lapx) have been merged.
		@@ -72,14 +73,10 @@

		`lapx` was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap), but has since evolved to offer much more.
		[`lapx`](https://github.com/rathaROG/lapx) supports all Single ✓ Batch ✓ Square ✓ Rectangular ✓ .

		`lapx` features the original `lapjv()` and `lapmod()` functions, and since [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0), `lapx` has introduced three additional assignment solvers:
		- `lapjvx()` and `lapjvxa()` — enhanced versions of [`lap.lapjv()`](https://github.com/gatagat/lap) with more flexible output formats
		- `lapjvc()` — an enhanced version of Christoph Heindl’s [`lapsolver.solve_dense()`](https://github.com/cheind/py-lapsolver) with unified output formats
		`lapx` was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a *Jonker-Volgenant* solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions.

		`lapx` [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) has introduced a new function: `lapjvs()` — an enhanced version of Vadim Markovtsev’s [`lapjv()`](https://github.com/src-d/lapjv), supporting both rectangular and square cost matrices, with flexible output styles.
		<details><summary>Click to read more ...</summary><br>

		<details><summary>Read more</summary><br>
		All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on *Jonker-Volgenant* algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core `lapjv()` and `lapmod()` from scratch based solely on the papers ¹˒² and the public domain Pascal implementation provided by A. Volgenant ³.

		All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on Jonker-Volgenant algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core `lapjv()` and `lapmod()` from scratch based solely on the papers ¹˒² and the public domain Pascal implementation provided by A. Volgenant ³.

		<sup>¹ R. Jonker and A. Volgenant, "A Shortest Augmenting Path Algorithm for Dense and Sparse Linear Assignment Problems", Computing 38, 325-340 (1987) </sup><br>
		@@ -107,7 +104,6 @@ <sup>² A. Volgenant, "Linear and Semi-Assignment Problems: A Core Oriented Approach", Computer Ops Res. 23, 917-932 (1996) </sup><br>
		\| Python 3.8 \| AMD64 \| x86_64/aarch64 \| x86_64/arm64 \|
		\| Python 3.9-3.14 ¹`² \| AMD64/ARM64 ³ \| x86_64/aarch64 \| x86_64/arm64 \|
		\| Python 3.9-3.14 ¹ \| AMD64/ARM64 ² \| x86_64/aarch64 \| x86_64/arm64 \|

		<sup>¹ `lapx` v0.5.13+ supports numpy 1.x-2.x and Python 3.14. 🆕 </sup><br>
		<sup>² Pre-built wheels for Python 3.13+ do not support free-threading.</sup><br>
		<sup>³ Windows ARM64 is experimental.</sup><br>
		<sup>¹ ⚠️ Pre-built wheels for Python 3.13+ do not support free-threading. </sup><br>
		<sup>² ⚠️ Windows ARM64 is experimental. </sup><br>

		@@ -138,4 +134,6 @@

		### 1. The original function ``lapjv()``
		### 🅰️ Single-matrix Solvers 📄

		#### 1. The original function ``lapjv()``

		The same as `lap`, use `import lap` to import; for example:
		@@ -146,4 +144,4 @@

		# x, y = lap.lapjv(np.random.rand(4, 5), extend_cost=True, return_cost=False)
		total_cost, x, y = lap.lapjv(np.random.rand(4, 5), extend_cost=True, return_cost=True)
		# x, y = lap.lapjv(np.random.rand(100, 150), extend_cost=True, return_cost=False)
		total_cost, x, y = lap.lapjv(np.random.rand(100, 150), extend_cost=True, return_cost=True)
		assignments = np.array([[y[i],i] for i in x if i >= 0])
		@@ -183,3 +181,3 @@ ```

		### 2. The new function ``lapjvx()``
		#### 2. The new function ``lapjvx()``

		@@ -191,4 +189,4 @@ `lapjvx()` basically is `lapjv()`, but it matches the return style of SciPy's [`linear_sum_assignment`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html) with no additional overhead. You can see how it compares to others in the Object Tracking benchmark [here](https://github.com/rathaROG/lapx/blob/main/benchmark.md#-object-tracking).

		# row_indices, col_indices = lap.lapjvx(np.random.rand(4, 5), extend_cost=True, return_cost=False)
		total_cost, row_indices, col_indices = lap.lapjvx(np.random.rand(4, 5), extend_cost=True, return_cost=True)
		# row_indices, col_indices = lap.lapjvx(np.random.rand(100, 150), extend_cost=True, return_cost=False)
		total_cost, row_indices, col_indices = lap.lapjvx(np.random.rand(100, 150), extend_cost=True, return_cost=True)
		assignments = np.array(list(zip(row_indices, col_indices)))
		@@ -199,3 +197,3 @@ ```

		### 3. The new function ``lapjvxa()``
		#### 3. The new function ``lapjvxa()``

		@@ -207,4 +205,4 @@ `lapjvxa()` is essentially the same as `lapjvx()`, but it returns assignments with shape `(K, 2)` directly — no additional or manual post-processing required. `lapjvxa()` is optimized for applications that only need the final assignments and do not require control over the `cost_limit` parameter.

		# assignments = lap.lapjvxa(np.random.rand(4, 5), extend_cost=True, return_cost=False)
		total_cost, assignments = lap.lapjvxa(np.random.rand(4, 5), extend_cost=True, return_cost=True)
		# assignments = lap.lapjvxa(np.random.rand(100, 150), extend_cost=True, return_cost=False)
		total_cost, assignments = lap.lapjvxa(np.random.rand(100, 150), extend_cost=True, return_cost=True)
		```
		@@ -216,3 +214,3 @@

		### 4. The new function ``lapjvc()``
		#### 4. The new function ``lapjvc()``

		@@ -224,4 +222,4 @@ `lapjvc()` is an enhanced version of Christoph Heindl's [py-lapsolver](https://github.com/cheind/py-lapsolver). `lapjvc()` is as fast as (if not faster than) other functions when `n=m` (the cost matrix is square), but it is much slower when `n≠m` (the cost matrix is rectangular). This function adopts the return style of `lapjvx()` — the same as SciPy's [`linear_sum_assignment`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html).

		# row_indices, col_indices = lap.lapjvc(np.random.rand(4, 5), return_cost=False)
		total_cost, row_indices, col_indices = lap.lapjvc(np.random.rand(4, 5), return_cost=True)
		# row_indices, col_indices = lap.lapjvc(np.random.rand(100, 150), return_cost=False)
		total_cost, row_indices, col_indices = lap.lapjvc(np.random.rand(100, 150), return_cost=True)
		assignments = np.array(list(zip(row_indices, col_indices)))
		@@ -234,3 +232,3 @@ ```

		### 5. The new function ``lapjvs()``
		#### 5. The new function ``lapjvs()``

		@@ -242,4 +240,4 @@ `lapjvs()` is an enhanced version of Vadim Markovtsev's [`lapjv`](https://github.com/src-d/lapjv). While `lapjvs()` does not use CPU special instruction sets like the original implementation, it still delivers comparable performance. It natively supports both square and rectangular cost matrices and can produce output either in SciPy's [`linear_sum_assignment`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html) style or `(x, y)` mappings. See the [docstring here](https://github.com/rathaROG/lapx/blob/main/lap/lapjvs.py) for more details.

		# row_indices, col_indices = lap.lapjvs(np.random.rand(4, 5), return_cost=False, jvx_like=True)
		total_cost, row_indices, col_indices = lap.lapjvs(np.random.rand(4, 5), return_cost=True, jvx_like=True)
		# row_indices, col_indices = lap.lapjvs(np.random.rand(100, 150), return_cost=False, jvx_like=True)
		total_cost, row_indices, col_indices = lap.lapjvs(np.random.rand(100, 150), return_cost=True, jvx_like=True)
		assignments = np.array(list(zip(row_indices, col_indices)))
		@@ -250,5 +248,20 @@ ```

		<details><summary>Show <code>lapjvsa()</code></summary>

		#### 6. The new function ``lapjvsa()``

		`lapjvsa()` is essentially the same as `lapjvs()`, but it returns assignments with shape `(K, 2)` directly — no additional or manual post-processing required.

		```python
		import numpy as np, lap

		# assignments = lap.lapjvsa(np.random.rand(100, 150), return_cost=False)
		total_cost, assignments = lap.lapjvsa(np.random.rand(100, 150), return_cost=True)
		```

		</details>

		<details><summary>Show <code>lapmod()</code></summary>

		### 6. The original function ``lapmod()``
		#### 7. The original function ``lapmod()``

		@@ -260,3 +273,3 @@ For see the [docstring](https://github.com/rathaROG/lapx/blob/8d56b42265a23c3b5a290b1039dacaac70dfe60d/lap/lapmod.py#L275) for details.

		n, m = 5000, 5000
		n, m = 1000, 1000
		cm = np.random.rand(n, m)
		@@ -277,2 +290,80 @@

		### 🅱️ Batch Solvers 🗂️

		#### 1. The new function ``lapjvx_batch()``

		`lapjvx_batch()` is the batch version of [`lapjvx()`](https://github.com/rathaROG/lapx#2-the-new-function-lapjvx), accepting costs with shape `(B, N, M)`. See common parameters and return in [`lap/__init__.py`](https://github.com/rathaROG/lapx/blob/main/lap/__init__.py).

		```python
		import numpy as np, lap, os

		batch_costs = np.random.rand(500, 100, 150) # (B, N, M) # B is batch size
		costs, rows, cols = lap.lapjvx_batch(batch_costs, extend_cost=True, return_cost=True, n_threads=os.cpu_count())
		print(f"total costs = {costs.sum()}")

		# Access the assignment batch b = 7
		rows_7 = rows[7] # 1D array of row indices
		cols_7 = cols[7] # 1D array of col indices
		assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2)
		print(f"assignments_7.shape = {assignments_7.shape}")
		```

		<details><summary>Show <code>lapjvxa_batch()</code></summary>

		#### 2. The new function ``lapjvxa_batch()``

		`lapjvxa_batch()` is the batch version of [`lapjvxa()`](https://github.com/rathaROG/lapx#3-the-new-function-lapjvxa), accepting costs with shape `(B, N, M)`. See common parameters and return in [`lap/__init__.py`](https://github.com/rathaROG/lapx/blob/main/lap/__init__.py).

		```python
		import numpy as np, lap, os

		batch_costs = np.random.rand(500, 100, 150) # (B, N, M) # B is batch size
		costs, assignments = lap.lapjvxa_batch(batch_costs, extend_cost=True, return_cost=True, n_threads=os.cpu_count())
		print(f"total costs = {costs.sum()}")
		print(f"len(assignments) = {len(assignments)}")
		print(f"assignments[0].shape = {assignments[0].shape}")
		```

		</details>

		<details><summary>Show <code>lapjvs_batch()</code></summary>

		#### 3. The new function ``lapjvs_batch()``

		`lapjvs_batch()` is the batch version of [`lapjvs()`](https://github.com/rathaROG/lapx#5-the-new-function-lapjvs), accepting costs with shape `(B, N, M)`. See common parameters and return in [`lap/__init__.py`](https://github.com/rathaROG/lapx/blob/main/lap/__init__.py).

		```python
		import numpy as np, lap, os

		batch_costs = np.random.rand(500, 100, 150) # (B, N, M) # B is batch size
		costs, rows, cols = lap.lapjvs_batch(batch_costs, extend_cost=True, return_cost=True, n_threads=os.cpu_count())
		print(f"total costs = {costs.sum()}")

		# Access the assignment batch b = 7
		rows_7 = rows[7] # 1D array of row indices
		cols_7 = cols[7] # 1D array of col indices
		assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2)
		print(f"assignments_7.shape = {assignments_7.shape}")
		```

		</details>

		<details><summary>Show <code>lapjvsa_batch()</code></summary>

		#### 4. The new function ``lapjvsa_batch()``

		`lapjvsa_batch()` is the batch version of [`lapjvsa()`](https://github.com/rathaROG/lapx#6-the-new-function-lapjvxa), accepting costs with shape `(B, N, M)`. See common parameters and return in [`lap/__init__.py`](https://github.com/rathaROG/lapx/blob/main/lap/__init__.py).

		```python
		import numpy as np, lap, os

		batch_costs = np.random.rand(500, 100, 150) # (B, N, M) # B is batch size
		costs, assignments = lap.lapjvsa_batch(batch_costs, extend_cost=True, return_cost=True, n_threads=os.cpu_count())
		print(f"total costs = {costs.sum()}")
		print(f"len(assignments) = {len(assignments)}")
		print(f"assignments[0].shape = {assignments[0].shape}")
		```

		</details>

		## 🏆 Quick Benchmark
		@@ -279,0 +370,0 @@

+2

-0

lapx.egg-info/SOURCES.txt

		@@ -10,2 +10,4 @@ LICENSE
		lap/lapjvs.py
		lap/lapjvs_batch.py
		lap/lapjvx_batch.py
		lap/lapmod.py
		@@ -12,0 +14,0 @@ lapx.egg-info/PKG-INFO

+126

-35

PKG-INFO

		Metadata-Version: 2.4
		Name: lapx
		Version: 0.7.1
		Summary: Linear assignment problem solvers
		Version: 0.8.0
		Summary: Linear assignment problem solvers, including single and batch solvers.
		Home-page: https://github.com/rathaROG/lapx
		@@ -9,3 +9,3 @@ Author: Ratha SIV
		License: MIT
		Keywords: Linear Assignment Problem Solver,LAP solver,Jonker-Volgenant Algorithm,LAPJV,LAPMOD,lap,lapx,lapjvx,lapjvxa,lapjvc,lapjvs
		Keywords: Linear Assignment Problem Solver,LAP solver,Jonker-Volgenant Algorithm,LAPJV,LAPMOD,lap,lapx,lapjvx,lapjvxa,lapjvc,lapjvs,lapjvsalapjvx_batch,lapjvxa_batch,lapjvs_batch,lapjvsa_batch
		Classifier: Development Status :: 4 - Beta
		@@ -55,5 +55,6 @@ Classifier: Environment :: Console

		- 2025/10/21: `lapx` [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) introduced `lapjvs()`.
		- 2025/10/16: `lapx` [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) introduced `lapjvx()`, `lapjvxa()`, and `lapjvc()`.
		- 2025/10/15: Added Python 3.14 support and [more](https://github.com/rathaROG/lapx/pull/15).
		- 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) introduced `lapjvsa()`, `lapjvx_batch()`, `lapjvxa_batch()`, `lapjvs_batch()` and `lapjvsa_batch()`.
		- 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) introduced `lapjvs()`.
		- 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) introduced `lapjvx()`, `lapjvxa()`, and `lapjvc()`.
		- 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support.
		- 2024/12/01: The original [`lap`](https://github.com/gatagat/lap) and [`lapx`](https://github.com/rathaROG/lapx) have been merged.
		@@ -72,14 +73,10 @@

		`lapx` was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap), but has since evolved to offer much more.
		[`lapx`](https://github.com/rathaROG/lapx) supports all Single ✓ Batch ✓ Square ✓ Rectangular ✓ .

		`lapx` features the original `lapjv()` and `lapmod()` functions, and since [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0), `lapx` has introduced three additional assignment solvers:
		- `lapjvx()` and `lapjvxa()` — enhanced versions of [`lap.lapjv()`](https://github.com/gatagat/lap) with more flexible output formats
		- `lapjvc()` — an enhanced version of Christoph Heindl’s [`lapsolver.solve_dense()`](https://github.com/cheind/py-lapsolver) with unified output formats
		`lapx` was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a *Jonker-Volgenant* solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions.

		`lapx` [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) has introduced a new function: `lapjvs()` — an enhanced version of Vadim Markovtsev’s [`lapjv()`](https://github.com/src-d/lapjv), supporting both rectangular and square cost matrices, with flexible output styles.
		<details><summary>Click to read more ...</summary><br>

		<details><summary>Read more</summary><br>
		All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on *Jonker-Volgenant* algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core `lapjv()` and `lapmod()` from scratch based solely on the papers ¹˒² and the public domain Pascal implementation provided by A. Volgenant ³.

		All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on Jonker-Volgenant algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core `lapjv()` and `lapmod()` from scratch based solely on the papers ¹˒² and the public domain Pascal implementation provided by A. Volgenant ³.

		<sup>¹ R. Jonker and A. Volgenant, "A Shortest Augmenting Path Algorithm for Dense and Sparse Linear Assignment Problems", Computing 38, 325-340 (1987) </sup><br>
		@@ -107,7 +104,6 @@ <sup>² A. Volgenant, "Linear and Semi-Assignment Problems: A Core Oriented Approach", Computer Ops Res. 23, 917-932 (1996) </sup><br>
		\| Python 3.8 \| AMD64 \| x86_64/aarch64 \| x86_64/arm64 \|
		\| Python 3.9-3.14 ¹`² \| AMD64/ARM64 ³ \| x86_64/aarch64 \| x86_64/arm64 \|
		\| Python 3.9-3.14 ¹ \| AMD64/ARM64 ² \| x86_64/aarch64 \| x86_64/arm64 \|

		<sup>¹ `lapx` v0.5.13+ supports numpy 1.x-2.x and Python 3.14. 🆕 </sup><br>
		<sup>² Pre-built wheels for Python 3.13+ do not support free-threading.</sup><br>
		<sup>³ Windows ARM64 is experimental.</sup><br>
		<sup>¹ ⚠️ Pre-built wheels for Python 3.13+ do not support free-threading. </sup><br>
		<sup>² ⚠️ Windows ARM64 is experimental. </sup><br>

		@@ -138,4 +134,6 @@

		### 1. The original function ``lapjv()``
		### 🅰️ Single-matrix Solvers 📄

		#### 1. The original function ``lapjv()``

		The same as `lap`, use `import lap` to import; for example:
		@@ -146,4 +144,4 @@

		# x, y = lap.lapjv(np.random.rand(4, 5), extend_cost=True, return_cost=False)
		total_cost, x, y = lap.lapjv(np.random.rand(4, 5), extend_cost=True, return_cost=True)
		# x, y = lap.lapjv(np.random.rand(100, 150), extend_cost=True, return_cost=False)
		total_cost, x, y = lap.lapjv(np.random.rand(100, 150), extend_cost=True, return_cost=True)
		assignments = np.array([[y[i],i] for i in x if i >= 0])
		@@ -183,3 +181,3 @@ ```

		### 2. The new function ``lapjvx()``
		#### 2. The new function ``lapjvx()``

		@@ -191,4 +189,4 @@ `lapjvx()` basically is `lapjv()`, but it matches the return style of SciPy's [`linear_sum_assignment`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html) with no additional overhead. You can see how it compares to others in the Object Tracking benchmark [here](https://github.com/rathaROG/lapx/blob/main/benchmark.md#-object-tracking).

		# row_indices, col_indices = lap.lapjvx(np.random.rand(4, 5), extend_cost=True, return_cost=False)
		total_cost, row_indices, col_indices = lap.lapjvx(np.random.rand(4, 5), extend_cost=True, return_cost=True)
		# row_indices, col_indices = lap.lapjvx(np.random.rand(100, 150), extend_cost=True, return_cost=False)
		total_cost, row_indices, col_indices = lap.lapjvx(np.random.rand(100, 150), extend_cost=True, return_cost=True)
		assignments = np.array(list(zip(row_indices, col_indices)))
		@@ -199,3 +197,3 @@ ```

		### 3. The new function ``lapjvxa()``
		#### 3. The new function ``lapjvxa()``

		@@ -207,4 +205,4 @@ `lapjvxa()` is essentially the same as `lapjvx()`, but it returns assignments with shape `(K, 2)` directly — no additional or manual post-processing required. `lapjvxa()` is optimized for applications that only need the final assignments and do not require control over the `cost_limit` parameter.

		# assignments = lap.lapjvxa(np.random.rand(4, 5), extend_cost=True, return_cost=False)
		total_cost, assignments = lap.lapjvxa(np.random.rand(4, 5), extend_cost=True, return_cost=True)
		# assignments = lap.lapjvxa(np.random.rand(100, 150), extend_cost=True, return_cost=False)
		total_cost, assignments = lap.lapjvxa(np.random.rand(100, 150), extend_cost=True, return_cost=True)
		```
		@@ -216,3 +214,3 @@

		### 4. The new function ``lapjvc()``
		#### 4. The new function ``lapjvc()``

		@@ -224,4 +222,4 @@ `lapjvc()` is an enhanced version of Christoph Heindl's [py-lapsolver](https://github.com/cheind/py-lapsolver). `lapjvc()` is as fast as (if not faster than) other functions when `n=m` (the cost matrix is square), but it is much slower when `n≠m` (the cost matrix is rectangular). This function adopts the return style of `lapjvx()` — the same as SciPy's [`linear_sum_assignment`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html).

		# row_indices, col_indices = lap.lapjvc(np.random.rand(4, 5), return_cost=False)
		total_cost, row_indices, col_indices = lap.lapjvc(np.random.rand(4, 5), return_cost=True)
		# row_indices, col_indices = lap.lapjvc(np.random.rand(100, 150), return_cost=False)
		total_cost, row_indices, col_indices = lap.lapjvc(np.random.rand(100, 150), return_cost=True)
		assignments = np.array(list(zip(row_indices, col_indices)))
		@@ -234,3 +232,3 @@ ```

		### 5. The new function ``lapjvs()``
		#### 5. The new function ``lapjvs()``

		@@ -242,4 +240,4 @@ `lapjvs()` is an enhanced version of Vadim Markovtsev's [`lapjv`](https://github.com/src-d/lapjv). While `lapjvs()` does not use CPU special instruction sets like the original implementation, it still delivers comparable performance. It natively supports both square and rectangular cost matrices and can produce output either in SciPy's [`linear_sum_assignment`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html) style or `(x, y)` mappings. See the [docstring here](https://github.com/rathaROG/lapx/blob/main/lap/lapjvs.py) for more details.

		# row_indices, col_indices = lap.lapjvs(np.random.rand(4, 5), return_cost=False, jvx_like=True)
		total_cost, row_indices, col_indices = lap.lapjvs(np.random.rand(4, 5), return_cost=True, jvx_like=True)
		# row_indices, col_indices = lap.lapjvs(np.random.rand(100, 150), return_cost=False, jvx_like=True)
		total_cost, row_indices, col_indices = lap.lapjvs(np.random.rand(100, 150), return_cost=True, jvx_like=True)
		assignments = np.array(list(zip(row_indices, col_indices)))
		@@ -250,5 +248,20 @@ ```

		<details><summary>Show <code>lapjvsa()</code></summary>

		#### 6. The new function ``lapjvsa()``

		`lapjvsa()` is essentially the same as `lapjvs()`, but it returns assignments with shape `(K, 2)` directly — no additional or manual post-processing required.

		```python
		import numpy as np, lap

		# assignments = lap.lapjvsa(np.random.rand(100, 150), return_cost=False)
		total_cost, assignments = lap.lapjvsa(np.random.rand(100, 150), return_cost=True)
		```

		</details>

		<details><summary>Show <code>lapmod()</code></summary>

		### 6. The original function ``lapmod()``
		#### 7. The original function ``lapmod()``

		@@ -260,3 +273,3 @@ For see the [docstring](https://github.com/rathaROG/lapx/blob/8d56b42265a23c3b5a290b1039dacaac70dfe60d/lap/lapmod.py#L275) for details.

		n, m = 5000, 5000
		n, m = 1000, 1000
		cm = np.random.rand(n, m)
		@@ -277,2 +290,80 @@

		### 🅱️ Batch Solvers 🗂️

		#### 1. The new function ``lapjvx_batch()``

		`lapjvx_batch()` is the batch version of [`lapjvx()`](https://github.com/rathaROG/lapx#2-the-new-function-lapjvx), accepting costs with shape `(B, N, M)`. See common parameters and return in [`lap/__init__.py`](https://github.com/rathaROG/lapx/blob/main/lap/__init__.py).

		```python
		import numpy as np, lap, os

		batch_costs = np.random.rand(500, 100, 150) # (B, N, M) # B is batch size
		costs, rows, cols = lap.lapjvx_batch(batch_costs, extend_cost=True, return_cost=True, n_threads=os.cpu_count())
		print(f"total costs = {costs.sum()}")

		# Access the assignment batch b = 7
		rows_7 = rows[7] # 1D array of row indices
		cols_7 = cols[7] # 1D array of col indices
		assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2)
		print(f"assignments_7.shape = {assignments_7.shape}")
		```

		<details><summary>Show <code>lapjvxa_batch()</code></summary>

		#### 2. The new function ``lapjvxa_batch()``

		`lapjvxa_batch()` is the batch version of [`lapjvxa()`](https://github.com/rathaROG/lapx#3-the-new-function-lapjvxa), accepting costs with shape `(B, N, M)`. See common parameters and return in [`lap/__init__.py`](https://github.com/rathaROG/lapx/blob/main/lap/__init__.py).

		```python
		import numpy as np, lap, os

		batch_costs = np.random.rand(500, 100, 150) # (B, N, M) # B is batch size
		costs, assignments = lap.lapjvxa_batch(batch_costs, extend_cost=True, return_cost=True, n_threads=os.cpu_count())
		print(f"total costs = {costs.sum()}")
		print(f"len(assignments) = {len(assignments)}")
		print(f"assignments[0].shape = {assignments[0].shape}")
		```

		</details>

		<details><summary>Show <code>lapjvs_batch()</code></summary>

		#### 3. The new function ``lapjvs_batch()``

		`lapjvs_batch()` is the batch version of [`lapjvs()`](https://github.com/rathaROG/lapx#5-the-new-function-lapjvs), accepting costs with shape `(B, N, M)`. See common parameters and return in [`lap/__init__.py`](https://github.com/rathaROG/lapx/blob/main/lap/__init__.py).

		```python
		import numpy as np, lap, os

		batch_costs = np.random.rand(500, 100, 150) # (B, N, M) # B is batch size
		costs, rows, cols = lap.lapjvs_batch(batch_costs, extend_cost=True, return_cost=True, n_threads=os.cpu_count())
		print(f"total costs = {costs.sum()}")

		# Access the assignment batch b = 7
		rows_7 = rows[7] # 1D array of row indices
		cols_7 = cols[7] # 1D array of col indices
		assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2)
		print(f"assignments_7.shape = {assignments_7.shape}")
		```

		</details>

		<details><summary>Show <code>lapjvsa_batch()</code></summary>

		#### 4. The new function ``lapjvsa_batch()``

		`lapjvsa_batch()` is the batch version of [`lapjvsa()`](https://github.com/rathaROG/lapx#6-the-new-function-lapjvxa), accepting costs with shape `(B, N, M)`. See common parameters and return in [`lap/__init__.py`](https://github.com/rathaROG/lapx/blob/main/lap/__init__.py).

		```python
		import numpy as np, lap, os

		batch_costs = np.random.rand(500, 100, 150) # (B, N, M) # B is batch size
		costs, assignments = lap.lapjvsa_batch(batch_costs, extend_cost=True, return_cost=True, n_threads=os.cpu_count())
		print(f"total costs = {costs.sum()}")
		print(f"len(assignments) = {len(assignments)}")
		print(f"assignments[0].shape = {assignments[0].shape}")
		```

		</details>

		## 🏆 Quick Benchmark
		@@ -279,0 +370,0 @@

+123

-32

README.md

		<details><summary>🆕 What's new</summary><br>

		- 2025/10/21: `lapx` [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) introduced `lapjvs()`.
		- 2025/10/16: `lapx` [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) introduced `lapjvx()`, `lapjvxa()`, and `lapjvc()`.
		- 2025/10/15: Added Python 3.14 support and [more](https://github.com/rathaROG/lapx/pull/15).
		- 2025/10/27: [v0.8.0](https://github.com/rathaROG/lapx/releases/tag/v0.8.0) introduced `lapjvsa()`, `lapjvx_batch()`, `lapjvxa_batch()`, `lapjvs_batch()` and `lapjvsa_batch()`.
		- 2025/10/21: [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) introduced `lapjvs()`.
		- 2025/10/16: [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0) introduced `lapjvx()`, `lapjvxa()`, and `lapjvc()`.
		- 2025/10/15: [v0.5.13](https://github.com/rathaROG/lapx/releases/tag/v0.5.13) added Python 3.14 support.
		- 2024/12/01: The original [`lap`](https://github.com/gatagat/lap) and [`lapx`](https://github.com/rathaROG/lapx) have been merged.
		@@ -19,14 +20,10 @@

		`lapx` was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap), but has since evolved to offer much more.
		[`lapx`](https://github.com/rathaROG/lapx) supports all Single ✓ Batch ✓ Square ✓ Rectangular ✓ .

		`lapx` features the original `lapjv()` and `lapmod()` functions, and since [v0.6.0](https://github.com/rathaROG/lapx/releases/tag/v0.6.0), `lapx` has introduced three additional assignment solvers:
		- `lapjvx()` and `lapjvxa()` — enhanced versions of [`lap.lapjv()`](https://github.com/gatagat/lap) with more flexible output formats
		- `lapjvc()` — an enhanced version of Christoph Heindl’s [`lapsolver.solve_dense()`](https://github.com/cheind/py-lapsolver) with unified output formats
		`lapx` was initially created to maintain Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) — a *Jonker-Volgenant* solver, but has since evolved to offer much more -> See the [usage section](https://github.com/rathaROG/lapx#-usage) for details on all available solver functions.

		`lapx` [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) has introduced a new function: `lapjvs()` — an enhanced version of Vadim Markovtsev’s [`lapjv()`](https://github.com/src-d/lapjv), supporting both rectangular and square cost matrices, with flexible output styles.
		<details><summary>Click to read more ...</summary><br>

		<details><summary>Read more</summary><br>
		All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on *Jonker-Volgenant* algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core `lapjv()` and `lapmod()` from scratch based solely on the papers ¹˒² and the public domain Pascal implementation provided by A. Volgenant ³.

		All [linear assignment problem](https://en.wikipedia.org/wiki/Assignment_problem) solvers in `lapx` are based on Jonker-Volgenant algorithm for dense LAPJV ¹ or sparse LAPMOD ² matrices. Tomas Kazmar's [`lap`](https://github.com/gatagat/lap) implemented the core `lapjv()` and `lapmod()` from scratch based solely on the papers ¹˒² and the public domain Pascal implementation provided by A. Volgenant ³.

		<sup>¹ R. Jonker and A. Volgenant, "A Shortest Augmenting Path Algorithm for Dense and Sparse Linear Assignment Problems", Computing 38, 325-340 (1987) </sup><br>
		@@ -54,7 +51,6 @@ <sup>² A. Volgenant, "Linear and Semi-Assignment Problems: A Core Oriented Approach", Computer Ops Res. 23, 917-932 (1996) </sup><br>
		\| Python 3.8 \| AMD64 \| x86_64/aarch64 \| x86_64/arm64 \|
		\| Python 3.9-3.14 ¹`² \| AMD64/ARM64 ³ \| x86_64/aarch64 \| x86_64/arm64 \|
		\| Python 3.9-3.14 ¹ \| AMD64/ARM64 ² \| x86_64/aarch64 \| x86_64/arm64 \|

		<sup>¹ `lapx` v0.5.13+ supports numpy 1.x-2.x and Python 3.14. 🆕 </sup><br>
		<sup>² Pre-built wheels for Python 3.13+ do not support free-threading.</sup><br>
		<sup>³ Windows ARM64 is experimental.</sup><br>
		<sup>¹ ⚠️ Pre-built wheels for Python 3.13+ do not support free-threading. </sup><br>
		<sup>² ⚠️ Windows ARM64 is experimental. </sup><br>

		@@ -85,4 +81,6 @@

		### 1. The original function ``lapjv()``
		### 🅰️ Single-matrix Solvers 📄

		#### 1. The original function ``lapjv()``

		The same as `lap`, use `import lap` to import; for example:
		@@ -93,4 +91,4 @@

		# x, y = lap.lapjv(np.random.rand(4, 5), extend_cost=True, return_cost=False)
		total_cost, x, y = lap.lapjv(np.random.rand(4, 5), extend_cost=True, return_cost=True)
		# x, y = lap.lapjv(np.random.rand(100, 150), extend_cost=True, return_cost=False)
		total_cost, x, y = lap.lapjv(np.random.rand(100, 150), extend_cost=True, return_cost=True)
		assignments = np.array([[y[i],i] for i in x if i >= 0])
		@@ -130,3 +128,3 @@ ```

		### 2. The new function ``lapjvx()``
		#### 2. The new function ``lapjvx()``

		@@ -138,4 +136,4 @@ `lapjvx()` basically is `lapjv()`, but it matches the return style of SciPy's [`linear_sum_assignment`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html) with no additional overhead. You can see how it compares to others in the Object Tracking benchmark [here](https://github.com/rathaROG/lapx/blob/main/benchmark.md#-object-tracking).

		# row_indices, col_indices = lap.lapjvx(np.random.rand(4, 5), extend_cost=True, return_cost=False)
		total_cost, row_indices, col_indices = lap.lapjvx(np.random.rand(4, 5), extend_cost=True, return_cost=True)
		# row_indices, col_indices = lap.lapjvx(np.random.rand(100, 150), extend_cost=True, return_cost=False)
		total_cost, row_indices, col_indices = lap.lapjvx(np.random.rand(100, 150), extend_cost=True, return_cost=True)
		assignments = np.array(list(zip(row_indices, col_indices)))
		@@ -146,3 +144,3 @@ ```

		### 3. The new function ``lapjvxa()``
		#### 3. The new function ``lapjvxa()``

		@@ -154,4 +152,4 @@ `lapjvxa()` is essentially the same as `lapjvx()`, but it returns assignments with shape `(K, 2)` directly — no additional or manual post-processing required. `lapjvxa()` is optimized for applications that only need the final assignments and do not require control over the `cost_limit` parameter.

		# assignments = lap.lapjvxa(np.random.rand(4, 5), extend_cost=True, return_cost=False)
		total_cost, assignments = lap.lapjvxa(np.random.rand(4, 5), extend_cost=True, return_cost=True)
		# assignments = lap.lapjvxa(np.random.rand(100, 150), extend_cost=True, return_cost=False)
		total_cost, assignments = lap.lapjvxa(np.random.rand(100, 150), extend_cost=True, return_cost=True)
		```
		@@ -163,3 +161,3 @@

		### 4. The new function ``lapjvc()``
		#### 4. The new function ``lapjvc()``

		@@ -171,4 +169,4 @@ `lapjvc()` is an enhanced version of Christoph Heindl's [py-lapsolver](https://github.com/cheind/py-lapsolver). `lapjvc()` is as fast as (if not faster than) other functions when `n=m` (the cost matrix is square), but it is much slower when `n≠m` (the cost matrix is rectangular). This function adopts the return style of `lapjvx()` — the same as SciPy's [`linear_sum_assignment`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html).

		# row_indices, col_indices = lap.lapjvc(np.random.rand(4, 5), return_cost=False)
		total_cost, row_indices, col_indices = lap.lapjvc(np.random.rand(4, 5), return_cost=True)
		# row_indices, col_indices = lap.lapjvc(np.random.rand(100, 150), return_cost=False)
		total_cost, row_indices, col_indices = lap.lapjvc(np.random.rand(100, 150), return_cost=True)
		assignments = np.array(list(zip(row_indices, col_indices)))
		@@ -181,3 +179,3 @@ ```

		### 5. The new function ``lapjvs()``
		#### 5. The new function ``lapjvs()``

		@@ -189,4 +187,4 @@ `lapjvs()` is an enhanced version of Vadim Markovtsev's [`lapjv`](https://github.com/src-d/lapjv). While `lapjvs()` does not use CPU special instruction sets like the original implementation, it still delivers comparable performance. It natively supports both square and rectangular cost matrices and can produce output either in SciPy's [`linear_sum_assignment`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html) style or `(x, y)` mappings. See the [docstring here](https://github.com/rathaROG/lapx/blob/main/lap/lapjvs.py) for more details.

		# row_indices, col_indices = lap.lapjvs(np.random.rand(4, 5), return_cost=False, jvx_like=True)
		total_cost, row_indices, col_indices = lap.lapjvs(np.random.rand(4, 5), return_cost=True, jvx_like=True)
		# row_indices, col_indices = lap.lapjvs(np.random.rand(100, 150), return_cost=False, jvx_like=True)
		total_cost, row_indices, col_indices = lap.lapjvs(np.random.rand(100, 150), return_cost=True, jvx_like=True)
		assignments = np.array(list(zip(row_indices, col_indices)))
		@@ -197,5 +195,20 @@ ```

		<details><summary>Show <code>lapjvsa()</code></summary>

		#### 6. The new function ``lapjvsa()``

		`lapjvsa()` is essentially the same as `lapjvs()`, but it returns assignments with shape `(K, 2)` directly — no additional or manual post-processing required.

		```python
		import numpy as np, lap

		# assignments = lap.lapjvsa(np.random.rand(100, 150), return_cost=False)
		total_cost, assignments = lap.lapjvsa(np.random.rand(100, 150), return_cost=True)
		```

		</details>

		<details><summary>Show <code>lapmod()</code></summary>

		### 6. The original function ``lapmod()``
		#### 7. The original function ``lapmod()``

		@@ -207,3 +220,3 @@ For see the [docstring](https://github.com/rathaROG/lapx/blob/8d56b42265a23c3b5a290b1039dacaac70dfe60d/lap/lapmod.py#L275) for details.

		n, m = 5000, 5000
		n, m = 1000, 1000
		cm = np.random.rand(n, m)
		@@ -224,2 +237,80 @@

		### 🅱️ Batch Solvers 🗂️

		#### 1. The new function ``lapjvx_batch()``

		`lapjvx_batch()` is the batch version of [`lapjvx()`](https://github.com/rathaROG/lapx#2-the-new-function-lapjvx), accepting costs with shape `(B, N, M)`. See common parameters and return in [`lap/__init__.py`](https://github.com/rathaROG/lapx/blob/main/lap/__init__.py).

		```python
		import numpy as np, lap, os

		batch_costs = np.random.rand(500, 100, 150) # (B, N, M) # B is batch size
		costs, rows, cols = lap.lapjvx_batch(batch_costs, extend_cost=True, return_cost=True, n_threads=os.cpu_count())
		print(f"total costs = {costs.sum()}")

		# Access the assignment batch b = 7
		rows_7 = rows[7] # 1D array of row indices
		cols_7 = cols[7] # 1D array of col indices
		assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2)
		print(f"assignments_7.shape = {assignments_7.shape}")
		```

		<details><summary>Show <code>lapjvxa_batch()</code></summary>

		#### 2. The new function ``lapjvxa_batch()``

		`lapjvxa_batch()` is the batch version of [`lapjvxa()`](https://github.com/rathaROG/lapx#3-the-new-function-lapjvxa), accepting costs with shape `(B, N, M)`. See common parameters and return in [`lap/__init__.py`](https://github.com/rathaROG/lapx/blob/main/lap/__init__.py).

		```python
		import numpy as np, lap, os

		batch_costs = np.random.rand(500, 100, 150) # (B, N, M) # B is batch size
		costs, assignments = lap.lapjvxa_batch(batch_costs, extend_cost=True, return_cost=True, n_threads=os.cpu_count())
		print(f"total costs = {costs.sum()}")
		print(f"len(assignments) = {len(assignments)}")
		print(f"assignments[0].shape = {assignments[0].shape}")
		```

		</details>

		<details><summary>Show <code>lapjvs_batch()</code></summary>

		#### 3. The new function ``lapjvs_batch()``

		`lapjvs_batch()` is the batch version of [`lapjvs()`](https://github.com/rathaROG/lapx#5-the-new-function-lapjvs), accepting costs with shape `(B, N, M)`. See common parameters and return in [`lap/__init__.py`](https://github.com/rathaROG/lapx/blob/main/lap/__init__.py).

		```python
		import numpy as np, lap, os

		batch_costs = np.random.rand(500, 100, 150) # (B, N, M) # B is batch size
		costs, rows, cols = lap.lapjvs_batch(batch_costs, extend_cost=True, return_cost=True, n_threads=os.cpu_count())
		print(f"total costs = {costs.sum()}")

		# Access the assignment batch b = 7
		rows_7 = rows[7] # 1D array of row indices
		cols_7 = cols[7] # 1D array of col indices
		assignments_7 = np.column_stack((rows_7, cols_7)) # (K_b, 2)
		print(f"assignments_7.shape = {assignments_7.shape}")
		```

		</details>

		<details><summary>Show <code>lapjvsa_batch()</code></summary>

		#### 4. The new function ``lapjvsa_batch()``

		`lapjvsa_batch()` is the batch version of [`lapjvsa()`](https://github.com/rathaROG/lapx#6-the-new-function-lapjvxa), accepting costs with shape `(B, N, M)`. See common parameters and return in [`lap/__init__.py`](https://github.com/rathaROG/lapx/blob/main/lap/__init__.py).

		```python
		import numpy as np, lap, os

		batch_costs = np.random.rand(500, 100, 150) # (B, N, M) # B is batch size
		costs, assignments = lap.lapjvsa_batch(batch_costs, extend_cost=True, return_cost=True, n_threads=os.cpu_count())
		print(f"total costs = {costs.sum()}")
		print(f"len(assignments) = {len(assignments)}")
		print(f"assignments[0].shape = {assignments[0].shape}")
		```

		</details>

		## 🏆 Quick Benchmark
		@@ -226,0 +317,0 @@

+5

-3

setup.py

		@@ -6,3 +6,3 @@ # Copyright (c) 2025 Ratha SIV \| MIT License
		LICENSE = "MIT"
		DESCRIPTION = "Linear assignment problem solvers"
		DESCRIPTION = "Linear assignment problem solvers, including single and batch solvers."
		LONG_DESCRIPTION = open("README.md", encoding="utf-8").read()
		@@ -96,2 +96,3 @@

		# Merge all extensions
		ext_modules = cythonize([ext_jv, ext_jvx]) + [ext_jvc, ext_jvs]
		@@ -110,4 +111,5 @@
		keywords=['Linear Assignment Problem Solver', 'LAP solver',
		'Jonker-Volgenant Algorithm', 'LAPJV', 'LAPMOD',
		'lap', 'lapx', 'lapjvx', 'lapjvxa', 'lapjvc', 'lapjvs'],
		'Jonker-Volgenant Algorithm', 'LAPJV', 'LAPMOD', 'lap',
		'lapx', 'lapjvx', 'lapjvxa', 'lapjvc', 'lapjvs', 'lapjvsa'
		'lapjvx_batch', 'lapjvxa_batch', 'lapjvs_batch', 'lapjvsa_batch'],
		packages=find_packages(include=[PACKAGE_PATH, f"{PACKAGE_PATH}.*"]),
		@@ -114,0 +116,0 @@ include_package_data=True,

+7

-5

src/_lapjv/_lapjvx.pyx

		@@ -99,8 +99,10 @@ # _lapjvx.pyx \| Wrote on 2025/10/16 by rathaROG

		cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_c = \
		np.empty((n,), dtype=np.int32)
		cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_c = \
		np.empty((n,), dtype=np.int32)
		# Allocate x/y, build cost_ptr as before
		cdef cnp.ndarray[int_t, ndim=1, mode='c'] x_c = np.empty((n,), dtype=np.int32)
		cdef cnp.ndarray[int_t, ndim=1, mode='c'] y_c = np.empty((n,), dtype=np.int32)

		cdef int ret = lapjv_internal(n, cost_ptr, &x_c[0], &y_c[0])
		cdef int ret
		with nogil:
		ret = lapjv_internal(<uint_t> n, cost_ptr, &x_c[0], &y_c[0])

		free(cost_ptr)
		@@ -107,0 +109,0 @@ if ret != 0:

+180

-31

src/_lapjvs/lapjvs.cpp

		@@ -14,8 +14,13 @@ #include <functional>
		"Solves the linear sum assignment problem in float32 (casts inputs if needed). Returns (row_ind, col_ind).";
		static char lapjvsa_native_docstring[] =
		"Solves the linear sum assignment problem following the input dtype (float32 or float64). Returns pairs (K,2).";
		static char lapjvsa_float32_docstring[] =
		"Solves the linear sum assignment problem in float32 (casts inputs if needed). Returns pairs (K,2).";

		static PyObject py_lapjvs_native(PyObject self, PyObject args, PyObject kwargs);
		static PyObject py_lapjvs_float32(PyObject self, PyObject args, PyObject kwargs);
		static PyObject py_lapjvsa_native(PyObject self, PyObject args, PyObject kwargs);
		static PyObject py_lapjvsa_float32(PyObject self, PyObject args, PyObject kwargs);

		static PyMethodDef module_functions[] = {
		// Keep a friendly alias: lapjvs follows native dtype by default
		{"lapjvs", reinterpret_cast<PyCFunction>(py_lapjvs_native),
		@@ -27,2 +32,8 @@ METH_VARARGS \| METH_KEYWORDS, lapjvs_native_docstring},
		METH_VARARGS \| METH_KEYWORDS, lapjvs_float32_docstring},
		{"lapjvsa", reinterpret_cast<PyCFunction>(py_lapjvsa_native),
		METH_VARARGS \| METH_KEYWORDS, lapjvsa_native_docstring},
		{"lapjvsa_native", reinterpret_cast<PyCFunction>(py_lapjvsa_native),
		METH_VARARGS \| METH_KEYWORDS, lapjvsa_native_docstring},
		{"lapjvsa_float32", reinterpret_cast<PyCFunction>(py_lapjvsa_float32),
		METH_VARARGS \| METH_KEYWORDS, lapjvsa_float32_docstring},
		{NULL, NULL, 0, NULL}
		@@ -35,10 +46,7 @@ };
		PyModuleDef_HEAD_INIT,
		"lapjvs", /* m_name */
		module_docstring, /* m_doc */
		-1, /* m_size */
		module_functions, /* m_methods */
		NULL, /* m_reload */
		NULL, /* m_traverse */
		NULL, /* m_clear */
		NULL, /* m_free */
		"lapjvs",
		module_docstring,
		-1,
		module_functions,
		NULL, NULL, NULL, NULL,
		};
		@@ -61,4 +69,3 @@ PyObject *m = PyModule_Create(&moduledef);
		public:
		_pyobj() : pyobj_parent<O>(
		nullptr, [](O *p){ if (p) Py_DECREF(p); }) {}
		_pyobj() : pyobj_parent<O>(nullptr, [](O *p){ if (p) Py_DECREF(p); }) {}
		explicit _pyobj(PyObject *ptr) : pyobj_parent<O>(
		@@ -90,6 +97,3 @@ reinterpret_cast<O >(ptr), [](O p){ if(p) Py_DECREF(p); }) {}

		// Native dtype entry point: lapjvs_native(cost_matrix, verbose=False)
		// - Accepts only float32 or float64 without casting; errors otherwise.
		// - Dispatches to float or double kernel based on the input dtype.
		// - Returns (row_ind, col_ind).
		// Zero-copy: preallocate NumPy outputs, write directly
		static PyObject py_lapjvs_native(PyObject self, PyObject args, PyObject kwargs) {
		@@ -99,11 +103,8 @@ PyObject *cost_matrix_obj;
		static const char *kwlist[] = {"cost_matrix", "verbose", NULL};
		if (!PyArg_ParseTupleAndKeywords(
		args, kwargs, "O\|p", const_cast<char**>(kwlist),
		&cost_matrix_obj, &verbose)) {
		if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|p", const_cast<char**>(kwlist),
		&cost_matrix_obj, &verbose)) {
		return NULL;
		}

		// Ensure array view; do not cast dtype.
		pyarray cost_matrix_array(PyArray_FROM_OTF(
		cost_matrix_obj, NPY_NOTYPE, NPY_ARRAY_IN_ARRAY));
		pyarray cost_matrix_array(PyArray_FROM_OTF(cost_matrix_obj, NPY_NOTYPE, NPY_ARRAY_IN_ARRAY));
		if (!cost_matrix_array) {
		@@ -118,3 +119,2 @@ PyErr_SetString(PyExc_ValueError, "\"cost_matrix\" must be a numpy array");
		}

		auto ndims = PyArray_NDIM(cost_matrix_array.get());
		@@ -138,3 +138,4 @@ if (ndims != 2) {

		npy_intp ret_dims[] = {dim, 0};
		// Zero-copy outputs
		npy_intp ret_dims[] = {dim};
		pyarray row_ind_array(PyArray_SimpleNew(1, ret_dims, NPY_INT));
		@@ -156,5 +157,3 @@ pyarray col_ind_array(PyArray_SimpleNew(1, ret_dims, NPY_INT));

		// Float32 entry point: lapjvs_float32(cost_matrix, verbose=False)
		// - Casts to float32 if needed, but avoids a copy when already float32.
		// - Returns (row_ind, col_ind).
		// Zero-copy: write into NumPy outputs directly
		static PyObject py_lapjvs_float32(PyObject self, PyObject args, PyObject kwargs) {
		@@ -164,9 +163,7 @@ PyObject *cost_matrix_obj;
		static const char *kwlist[] = {"cost_matrix", "verbose", NULL};
		if (!PyArg_ParseTupleAndKeywords(
		args, kwargs, "O\|p", const_cast<char**>(kwlist),
		&cost_matrix_obj, &verbose)) {
		if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|p", const_cast<char**>(kwlist),
		&cost_matrix_obj, &verbose)) {
		return NULL;
		}

		// Allow casting to float32, avoid copy if dtype already matches.
		pyarray cost_matrix_array(PyArray_FROM_OTF(
		@@ -197,3 +194,4 @@ cost_matrix_obj, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY \| NPY_ARRAY_FORCECAST));

		npy_intp ret_dims[] = {dim, 0};
		// Zero-copy outputs
		npy_intp ret_dims[] = {dim};
		pyarray row_ind_array(PyArray_SimpleNew(1, ret_dims, NPY_INT));
		@@ -208,2 +206,153 @@ pyarray col_ind_array(PyArray_SimpleNew(1, ret_dims, NPY_INT));
		return Py_BuildValue("(OO)", row_ind_array.get(), col_ind_array.get());
		}

		// Zero-copy for pairs: write mapping into NumPy arrays directly, then build pairs
		static PyObject py_lapjvsa_native(PyObject self, PyObject args, PyObject kwargs) {
		PyObject *cost_matrix_obj;
		int verbose = 0;
		static const char *kwlist[] = {"cost_matrix", "verbose", NULL};
		if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|p", const_cast<char**>(kwlist),
		&cost_matrix_obj, &verbose)) {
		return NULL;
		}

		pyarray cost_matrix_array(PyArray_FROM_OTF(cost_matrix_obj, NPY_NOTYPE, NPY_ARRAY_IN_ARRAY));
		if (!cost_matrix_array) {
		PyErr_SetString(PyExc_ValueError, "\"cost_matrix\" must be a numpy array");
		return NULL;
		}
		int typ = PyArray_TYPE(cost_matrix_array.get());
		if (typ != NPY_FLOAT32 && typ != NPY_FLOAT64) {
		PyErr_SetString(PyExc_TypeError, "\"cost_matrix\" must be float32 or float64 for lapjvsa()");
		return NULL;
		}
		auto ndims = PyArray_NDIM(cost_matrix_array.get());
		if (ndims != 2) {
		PyErr_SetString(PyExc_ValueError, "\"cost_matrix\" must be a square 2D numpy array");
		return NULL;
		}
		auto dims = PyArray_DIMS(cost_matrix_array.get());
		if (dims[0] != dims[1]) {
		PyErr_SetString(PyExc_ValueError, "\"cost_matrix\" must be a square 2D numpy array");
		return NULL;
		}
		int dim = static_cast<int>(dims[0]);
		if (dim < 0) {
		PyErr_SetString(PyExc_ValueError, "\"cost_matrix\"'s shape is invalid");
		return NULL;
		}
		auto cost_matrix = PyArray_DATA(cost_matrix_array.get());

		if (dim == 0) {
		npy_intp pdims[] = {0, 2};
		pyarray pairs(PyArray_SimpleNew(2, pdims, NPY_INT));
		return reinterpret_cast<PyObject*>(pairs.release());
		}

		// Zero-copy mapping outputs
		npy_intp odims[] = {dim};
		pyarray row_ind_array(PyArray_SimpleNew(1, odims, NPY_INT));
		pyarray col_ind_array(PyArray_SimpleNew(1, odims, NPY_INT));
		auto row_ind = reinterpret_cast<int*>(PyArray_DATA(row_ind_array.get()));
		auto col_ind = reinterpret_cast<int*>(PyArray_DATA(col_ind_array.get()));

		if (typ == NPY_FLOAT32) {
		std::unique_ptr<float[]> v(new float[dim]);
		call_lap<float>(dim, cost_matrix, verbose, row_ind, col_ind, v.get());
		} else {
		std::unique_ptr<double[]> v(new double[dim]);
		call_lap<double>(dim, cost_matrix, verbose, row_ind, col_ind, v.get());
		}

		// Count K
		npy_intp K = 0;
		for (int i = 0; i < dim; ++i) {
		int j = row_ind[i];
		if (j >= 0 && j < dim) ++K;
		}

		// Build pairs (K,2) directly
		npy_intp pdims[] = {K, 2};
		pyarray pairs(PyArray_SimpleNew(2, pdims, NPY_INT));
		auto* pdata = reinterpret_cast<int*>(PyArray_DATA(pairs.get()));
		npy_intp w = 0;
		for (int i = 0; i < dim; ++i) {
		int j = row_ind[i];
		if (j >= 0 && j < dim) {
		pdata[w * 2 + 0] = i;
		pdata[w * 2 + 1] = j;
		++w;
		}
		}
		return reinterpret_cast<PyObject*>(pairs.release());
		}

		// Zero-copy for pairs (float32 path)
		static PyObject py_lapjvsa_float32(PyObject self, PyObject args, PyObject kwargs) {
		PyObject *cost_matrix_obj;
		int verbose = 0;
		static const char *kwlist[] = {"cost_matrix", "verbose", NULL};
		if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|p", const_cast<char**>(kwlist),
		&cost_matrix_obj, &verbose)) {
		return NULL;
		}

		pyarray cost_matrix_array(PyArray_FROM_OTF(
		cost_matrix_obj, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY \| NPY_ARRAY_FORCECAST));
		if (!cost_matrix_array) {
		PyErr_SetString(PyExc_ValueError, "\"cost_matrix\" must be convertible to float32 for lapjvsa_float32()");
		return NULL;
		}
		auto ndims = PyArray_NDIM(cost_matrix_array.get());
		if (ndims != 2) {
		PyErr_SetString(PyExc_ValueError, "\"cost_matrix\" must be a square 2D numpy array");
		return NULL;
		}
		auto dims = PyArray_DIMS(cost_matrix_array.get());
		if (dims[0] != dims[1]) {
		PyErr_SetString(PyExc_ValueError, "\"cost_matrix\" must be a square 2D numpy array");
		return NULL;
		}
		int dim = static_cast<int>(dims[0]);
		if (dim < 0) {
		PyErr_SetString(PyExc_ValueError, "\"cost_matrix\"'s shape is invalid");
		return NULL;
		}
		auto cost_matrix = PyArray_DATA(cost_matrix_array.get());

		if (dim == 0) {
		npy_intp pdims[] = {0, 2};
		pyarray pairs(PyArray_SimpleNew(2, pdims, NPY_INT));
		return reinterpret_cast<PyObject*>(pairs.release());
		}

		// Zero-copy mapping outputs
		npy_intp odims[] = {dim};
		pyarray row_ind_array(PyArray_SimpleNew(1, odims, NPY_INT));
		pyarray col_ind_array(PyArray_SimpleNew(1, odims, NPY_INT));
		auto row_ind = reinterpret_cast<int*>(PyArray_DATA(row_ind_array.get()));
		auto col_ind = reinterpret_cast<int*>(PyArray_DATA(col_ind_array.get()));

		std::unique_ptr<float[]> v(new float[dim]);
		call_lap<float>(dim, cost_matrix, verbose, row_ind, col_ind, v.get());

		// Count/build pairs
		npy_intp K = 0;
		for (int i = 0; i < dim; ++i) {
		int j = row_ind[i];
		if (j >= 0 && j < dim) ++K;
		}
		npy_intp pdims[] = {K, 2};
		pyarray pairs(PyArray_SimpleNew(2, pdims, NPY_INT));
		auto* pdata = reinterpret_cast<int*>(PyArray_DATA(pairs.get()));
		npy_intp w = 0;
		for (int i = 0; i < dim; ++i) {
		int j = row_ind[i];
		if (j >= 0 && j < dim) {
		pdata[w * 2 + 0] = i;
		pdata[w * 2 + 1] = j;
		++w;
		}
		}
		return reinterpret_cast<PyObject*>(pairs.release());
		}

src/_lapjv/_lapjv.cpp

Sorry, the diff of this file is too big to display

src/_lapjv/_lapjvx.cpp

Sorry, the diff of this file is too big to display

lapx - npm Package Compare versions

Improved metrics