lapx
Advanced tools
+112
-113
@@ -13,3 +13,3 @@ # š Quick Benchmark | ||
| ``` | ||
| pip install "lapx>=0.6.0" | ||
| pip install -U lapx | ||
| pip install scipy | ||
@@ -620,3 +620,3 @@ git clone https://github.com/rathaROG/lapx.git | ||
| See more benchmark results on all platforms [here on GitHub](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml). | ||
| See newer benchmark results on all platforms [here on GitHub](https://github.com/rathaROG/lapx/actions/workflows/benchmark.yaml). | ||
@@ -630,3 +630,3 @@ ## šµļøāāļø Other Benchmarks | ||
| ``` | ||
| pip install "lapx>=0.6.0" | ||
| pip install -U lapx | ||
| pip install scipy | ||
@@ -642,14 +642,11 @@ git clone https://github.com/rathaROG/lapx.git | ||
| To achieve optimal performance of `lapjvx()` or `lapjv()` in object tracking application, follow the implementation in the current [`benchmark_tracking.py`](https://github.com/rathaROG/lapx/blob/main/.github/test/benchmark_tracking.py) script. | ||
| š `lapx` [v0.7.0](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) introduced [`lapjvs()`](https://github.com/rathaROG/lapx#5-the-new-function-lapjvs), a highly competitive solver. Notably, `lapjvs()` outperforms other solvers in terms of speed when the input cost matrix is square, especially for sizes 5000 and above. | ||
| (See more results on various platforms and architectures [here](https://github.com/rathaROG/lapx/actions/runs/18620330585)) | ||
| š” To achieve optimal performance of `lapjvx()` or `lapjv()` in object tracking application, follow the implementation in the current [`benchmark_tracking.py`](https://github.com/rathaROG/lapx/blob/main/.github/test/benchmark_tracking.py) script. | ||
| šļø See more results on various platforms and architectures [here](https://github.com/rathaROG/lapx/actions/runs/18668517507). | ||
| <details><summary>Show the results:</summary> | ||
| ``` | ||
| Microsoft Windows [Version 10.0.26200.6899] | ||
| (c) Microsoft Corporation. All rights reserved. | ||
| D:\DEV\temp\lapx\.github\test>python benchmark_tracking.py | ||
| ################################################################# | ||
@@ -659,25 +656,26 @@ # Benchmark with threshold (cost_limit) = 0.05 | ||
| ----------------------------------------------------------------------------------------------------- | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | ||
| ----------------------------------------------------------------------------------------------------- | ||
| 10x10 | 0.000153s 5th | 0.000148s ā 4th | 0.000056s ā 1st | 0.000132s ā 3rd | 0.000084s ā 2nd | ||
| 25x20 | 0.000071s 5th | 0.000064s ā 4th | 0.000057s ā 2nd | 0.000057s ā 1st | 0.000061s ā 3rd | ||
| 50x50 | 0.000159s 5th | 0.000106s ā 3rd | 0.000075s ā 1st | 0.000082s ā 2nd | 0.000109s ā 4th | ||
| 100x150 | 0.000190s 3rd | 0.000574s ā 4th | 0.000132s ā 1st | 0.000149s ā 2nd | 0.000747s ā 5th | ||
| 250x250 | 0.001269s 4th | 0.001361s ā 5th | 0.000542s ā 2nd | 0.000519s ā 1st | 0.001181s ā 3rd | ||
| 550x500 | 0.003452s 1st | 0.028483s ā 5th | 0.006140s ā 3rd | 0.005663s ā 2nd | 0.021576s ā 4th | ||
| 1000x1000 | 0.024557s 4th | 0.023403s ā 3rd | 0.008724s ā 1st | 0.013036s ā 2nd | 0.026147s ā 5th | ||
| 2000x2500 | 0.037717s 3rd | 1.823954s ā 5th | 0.016659s ā 2nd | 0.016489s ā 1st | 1.580175s ā 4th | ||
| 5000x5000 | 1.047033s 3rd | 1.628817s ā 5th | 0.736356s ā 1st | 0.766828s ā 2nd | 1.349702s ā 4th | ||
| ----------------------------------------------------------------------------------------------------- | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| 10x10 | 0.000270s 6th | 0.000086s ā 1st | 0.000117s ā 3rd | 0.000093s ā 2nd | 0.000145s ā 4th | 0.000156s ā 5th | ||
| 25x20 | 0.000134s 6th | 0.000096s ā 1st | 0.000104s ā 4th | 0.000098s ā 2nd | 0.000109s ā 5th | 0.000103s ā 3rd | ||
| 50x50 | 0.000216s 6th | 0.000161s ā 4th | 0.000130s ā 2nd | 0.000135s ā 3rd | 0.000163s ā 5th | 0.000128s ā 1st | ||
| 100x150 | 0.000314s 4th | 0.001181s ā 6th | 0.000307s ā 3rd | 0.000304s ā 2nd | 0.001002s ā 5th | 0.000292s ā 1st | ||
| 250x250 | 0.001926s 4th | 0.002400s ā 6th | 0.001819s ā 3rd | 0.001703s ā 2nd | 0.002221s ā 5th | 0.001585s ā 1st | ||
| 550x500 | 0.005211s 1st | 0.046236s ā 6th | 0.010141s ā 4th | 0.009736s ā 3rd | 0.031337s ā 5th | 0.009591s ā 2nd | ||
| 1000x1000 | 0.035298s 4th | 0.062979s ā 6th | 0.030774s ā 3rd | 0.029720s ā 2nd | 0.037911s ā 5th | 0.014011s ā 1st | ||
| 2000x2500 | 0.047353s 4th | 2.537366s ā 6th | 0.017684s ā 1st | 0.019768s ā 2nd | 2.133186s ā 5th | 0.023504s ā 3rd | ||
| 5000x5000 | 1.923870s 5th | 3.216478s ā 6th | 1.527883s ā 3rd | 1.501829s ā 2nd | 1.720995s ā 4th | 0.879582s ā 1st | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| Note: LAPJV-IFT uses in-function filtering lap.lapjv(cost_limit=thresh). | ||
| š ------------------------ OVERALL RANKING ------------------------ š | ||
| 1. LAPX LAPJV : 768.7409 ms | ā | š„x5 š„x3 š„x1 | ||
| 2. LAPX LAPJVX : 802.9538 ms | ā | š„x3 š„x5 š„x1 | ||
| 3. BASELINE SciPy : 1114.6007 ms | ā | š„x1 š„x3 š©x2 š³ļøx3 | ||
| 4. LAPX LAPJVC : 2979.7809 ms | ā | š„x1 š„x2 š©x4 š³ļøx2 | ||
| 5. LAPX LAPJV-IFT : 3506.9110 ms | ā ļø | š„x2 š©x3 š³ļøx4 | ||
| š ------------------------------------------------------------------- š | ||
| š --------------------------- OVERALL RANKING --------------------------- š | ||
| 1. LAPX LAPJVS : 928.9522 ms | ā | š„x5 š„x1 š„x2 š³ļøx1 | ||
| 2. LAPX LAPJVX : 1563.3861 ms | ā | š„x7 š„x2 | ||
| 3. LAPX LAPJV : 1588.9597 ms | ā | š„x1 š„x1 š„x5 š©x2 | ||
| 4. BASELINE SciPy : 2014.5920 ms | ā | š„x1 š©x4 š³ļøx1 š„“x3 | ||
| 5. LAPX LAPJVC : 3927.0696 ms | ā | š©x2 š³ļøx7 | ||
| 6. LAPX LAPJV-IFT : 5866.9837 ms | ā ļø | š„x2 š©x1 š„“x6 | ||
| š ------------------------------------------------------------------------- š | ||
@@ -689,25 +687,26 @@ | ||
| ----------------------------------------------------------------------------------------------------- | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | ||
| ----------------------------------------------------------------------------------------------------- | ||
| 10x10 | 0.000116s 5th | 0.000042s ā 1st | 0.000048s ā 3rd | 0.000045s ā 2nd | 0.000060s ā 4th | ||
| 25x20 | 0.000052s 1st | 0.000056s ā 3rd | 0.000056s ā 4th | 0.000054s ā 2nd | 0.000062s ā 5th | ||
| 50x50 | 0.000105s 5th | 0.000104s ā 4th | 0.000070s ā 1st | 0.000072s ā 2nd | 0.000091s ā 3rd | ||
| 100x150 | 0.000169s 3rd | 0.000882s ā 5th | 0.000168s ā 1st | 0.000168s ā 2nd | 0.000690s ā 4th | ||
| 250x250 | 0.001306s 1st | 0.007618s ā 5th | 0.002725s ā 3rd | 0.002842s ā 4th | 0.001719s ā 2nd | ||
| 550x500 | 0.003593s 1st | 0.054599s ā 5th | 0.006124s ā 2nd | 0.006191s ā 3rd | 0.023443s ā 4th | ||
| 1000x1000 | 0.026108s 3rd | 0.029221s ā 4th | 0.010913s ā 1st | 0.011607s ā 2nd | 0.031362s ā 5th | ||
| 2000x2500 | 0.041879s 3rd | 1.971637s ā 5th | 0.016502s ā 1st | 0.017959s ā 2nd | 1.622495s ā 4th | ||
| 5000x5000 | 1.197406s 3rd | 1.463887s ā 5th | 0.642493s ā 2nd | 0.638527s ā 1st | 1.317815s ā 4th | ||
| ----------------------------------------------------------------------------------------------------- | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| 10x10 | 0.000181s 6th | 0.000080s ā 1st | 0.000091s ā 4th | 0.000083s ā 2nd | 0.000101s ā 5th | 0.000084s ā 3rd | ||
| 25x20 | 0.000122s 6th | 0.000092s ā 1st | 0.000100s ā 2nd | 0.000100s ā 3rd | 0.000107s ā 5th | 0.000103s ā 4th | ||
| 50x50 | 0.000218s 6th | 0.000149s ā 4th | 0.000133s ā 1st | 0.000140s ā 2nd | 0.000183s ā 5th | 0.000141s ā 3rd | ||
| 100x150 | 0.000350s 4th | 0.001086s ā 5th | 0.000258s ā 1st | 0.000279s ā 3rd | 0.001142s ā 6th | 0.000273s ā 2nd | ||
| 250x250 | 0.001713s 5th | 0.001953s ā 6th | 0.000978s ā 2nd | 0.000998s ā 3rd | 0.001682s ā 4th | 0.000929s ā 1st | ||
| 550x500 | 0.005035s 1st | 0.113739s ā 6th | 0.010219s ā 4th | 0.010151s ā 3rd | 0.029781s ā 5th | 0.010025s ā 2nd | ||
| 1000x1000 | 0.032870s 3rd | 0.076641s ā 6th | 0.037077s ā 5th | 0.035340s ā 4th | 0.031529s ā 1st | 0.031647s ā 2nd | ||
| 2000x2500 | 0.050076s 4th | 2.552992s ā 6th | 0.017056s ā 1st | 0.020267s ā 2nd | 2.110527s ā 5th | 0.022934s ā 3rd | ||
| 5000x5000 | 2.035414s 5th | 3.376261s ā 6th | 1.640862s ā 4th | 1.622361s ā 3rd | 1.534738s ā 2nd | 0.910615s ā 1st | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| Note: LAPJV-IFT uses in-function filtering lap.lapjv(cost_limit=thresh). | ||
| š ------------------------ OVERALL RANKING ------------------------ š | ||
| 1. LAPX LAPJVX : 677.4637 ms | ā | š„x1 š„x6 š„x1 š©x1 | ||
| 2. LAPX LAPJV : 679.1001 ms | ā | š„x4 š„x2 š„x2 š©x1 | ||
| 3. BASELINE SciPy : 1270.7361 ms | ā | š„x3 š„x4 š³ļøx2 | ||
| 4. LAPX LAPJVC : 2997.7366 ms | ā | š„x1 š„x1 š©x5 š³ļøx2 | ||
| 5. LAPX LAPJV-IFT : 3528.0464 ms | ā ļø | š„x1 š„x1 š©x2 š³ļøx5 | ||
| š ------------------------------------------------------------------- š | ||
| š --------------------------- OVERALL RANKING --------------------------- š | ||
| 1. LAPX LAPJVS : 976.7508 ms | ā | š„x2 š„x3 š„x3 š©x1 | ||
| 2. LAPX LAPJVX : 1689.7199 ms | ā | š„x3 š„x5 š©x1 | ||
| 3. LAPX LAPJV : 1706.7731 ms | ā | š„x3 š„x2 š©x3 š³ļøx1 | ||
| 4. BASELINE SciPy : 2125.9788 ms | ā | š„x1 š„x1 š©x2 š³ļøx2 š„“x3 | ||
| 5. LAPX LAPJVC : 3709.7903 ms | ā | š„x1 š„x1 š©x1 š³ļøx5 š„“x1 | ||
| 6. LAPX LAPJV-IFT : 6122.9942 ms | ā ļø | š„x2 š©x1 š³ļøx1 š„“x5 | ||
| š ------------------------------------------------------------------------- š | ||
@@ -719,25 +718,26 @@ | ||
| ----------------------------------------------------------------------------------------------------- | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | ||
| ----------------------------------------------------------------------------------------------------- | ||
| 10x10 | 0.000118s 5th | 0.000049s ā 3rd | 0.000047s ā 2nd | 0.000045s ā 1st | 0.000058s ā 4th | ||
| 25x20 | 0.000054s 2nd | 0.000064s ā 5th | 0.000058s ā 3rd | 0.000054s ā 1st | 0.000061s ā 4th | ||
| 50x50 | 0.000092s 3rd | 0.000101s ā 4th | 0.000081s ā 2nd | 0.000078s ā 1st | 0.000102s ā 5th | ||
| 100x150 | 0.000195s 3rd | 0.000710s ā 5th | 0.000157s ā 2nd | 0.000147s ā 1st | 0.000647s ā 4th | ||
| 250x250 | 0.001387s 4th | 0.001640s ā 5th | 0.000840s ā 2nd | 0.000802s ā 1st | 0.001287s ā 3rd | ||
| 550x500 | 0.004195s 1st | 0.095603s ā 5th | 0.006292s ā 3rd | 0.006094s ā 2nd | 0.022542s ā 4th | ||
| 1000x1000 | 0.024699s 3rd | 0.037791s ā 5th | 0.017332s ā 1st | 0.017360s ā 2nd | 0.030512s ā 4th | ||
| 2000x2500 | 0.038131s 3rd | 1.946517s ā 5th | 0.016853s ā 2nd | 0.016679s ā 1st | 1.694409s ā 4th | ||
| 5000x5000 | 1.132641s 3rd | 1.679415s ā 5th | 0.771842s ā 2nd | 0.724689s ā 1st | 1.162723s ā 4th | ||
| ----------------------------------------------------------------------------------------------------- | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| 10x10 | 0.000167s 6th | 0.000119s ā 5th | 0.000092s ā 2nd | 0.000093s ā 3rd | 0.000105s ā 4th | 0.000085s ā 1st | ||
| 25x20 | 0.000105s 6th | 0.000097s ā 3rd | 0.000096s ā 2nd | 0.000096s ā 1st | 0.000102s ā 5th | 0.000099s ā 4th | ||
| 50x50 | 0.000192s 5th | 0.000158s ā 3rd | 0.000142s ā 1st | 0.000150s ā 2nd | 0.000171s ā 4th | 0.000193s ā 6th | ||
| 100x150 | 0.000319s 4th | 0.001089s ā 6th | 0.000302s ā 3rd | 0.000268s ā 1st | 0.001078s ā 5th | 0.000271s ā 2nd | ||
| 250x250 | 0.001877s 6th | 0.001662s ā 4th | 0.000832s ā 2nd | 0.000866s ā 3rd | 0.001686s ā 5th | 0.000810s ā 1st | ||
| 550x500 | 0.004962s 1st | 0.173261s ā 6th | 0.010107s ā 4th | 0.010075s ā 3rd | 0.021147s ā 5th | 0.009892s ā 2nd | ||
| 1000x1000 | 0.034665s 5th | 0.050879s ā 6th | 0.024332s ā 3rd | 0.023485s ā 2nd | 0.030950s ā 4th | 0.021152s ā 1st | ||
| 2000x2500 | 0.050928s 4th | 2.503577s ā 6th | 0.017477s ā 1st | 0.019962s ā 2nd | 2.087273s ā 5th | 0.027349s ā 3rd | ||
| 5000x5000 | 2.111693s 5th | 3.396578s ā 6th | 1.693776s ā 4th | 1.685035s ā 3rd | 1.567221s ā 2nd | 1.058214s ā 1st | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| Note: LAPJV-IFT uses in-function filtering lap.lapjv(cost_limit=thresh). | ||
| š ------------------------ OVERALL RANKING ------------------------ š | ||
| 1. LAPX LAPJVX : 765.9472 ms | ā | š„x7 š„x2 | ||
| 2. LAPX LAPJV : 813.5027 ms | ā | š„x1 š„x6 š„x2 | ||
| 3. BASELINE SciPy : 1201.5123 ms | ā | š„x1 š„x1 š„x5 š©x1 š³ļøx1 | ||
| 4. LAPX LAPJVC : 2912.3413 ms | ā | š„x1 š©x7 š³ļøx1 | ||
| 5. LAPX LAPJV-IFT : 3761.8895 ms | ā | š„x1 š©x1 š³ļøx7 | ||
| š ------------------------------------------------------------------- š | ||
| š --------------------------- OVERALL RANKING --------------------------- š | ||
| 1. LAPX LAPJVS : 1118.0646 ms | ā | š„x4 š„x2 š„x1 š©x1 š„“x1 | ||
| 2. LAPX LAPJVX : 1740.0298 ms | ā | š„x2 š„x3 š„x4 | ||
| 3. LAPX LAPJV : 1747.1555 ms | ā | š„x2 š„x3 š„x2 š©x2 | ||
| 4. BASELINE SciPy : 2204.9078 ms | ā | š„x1 š©x2 š³ļøx3 š„“x3 | ||
| 5. LAPX LAPJVC : 3709.7338 ms | ā | š„x1 š©x3 š³ļøx5 | ||
| 6. LAPX LAPJV-IFT : 6127.4199 ms | ā | š„x2 š©x1 š³ļøx1 š„“x5 | ||
| š ------------------------------------------------------------------------- š | ||
@@ -749,25 +749,26 @@ | ||
| ----------------------------------------------------------------------------------------------------- | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | ||
| ----------------------------------------------------------------------------------------------------- | ||
| 10x10 | 0.000121s 5th | 0.000046s ā 1st | 0.000051s ā 3rd | 0.000049s ā 2nd | 0.000060s ā 4th | ||
| 25x20 | 0.000055s 1st | 0.000073s ā 5th | 0.000058s ā 3rd | 0.000058s ā 2nd | 0.000072s ā 4th | ||
| 50x50 | 0.000104s 4th | 0.000097s ā 3rd | 0.000076s ā 1st | 0.000088s ā 2nd | 0.000109s ā 5th | ||
| 100x150 | 0.000190s 3rd | 0.000723s ā 5th | 0.000174s ā 2nd | 0.000153s ā 1st | 0.000708s ā 4th | ||
| 250x250 | 0.001418s 4th | 0.001791s ā 5th | 0.000917s ā 2nd | 0.000879s ā 1st | 0.001381s ā 3rd | ||
| 550x500 | 0.004009s 1st | 0.094516s ā 5th | 0.006915s ā 2nd | 0.007350s ā 3rd | 0.025237s ā 4th | ||
| 1000x1000 | 0.022408s 2nd | 0.046482s ā 5th | 0.022091s ā 1st | 0.023886s ā 3rd | 0.030067s ā 4th | ||
| 2000x2500 | 0.038188s 3rd | 1.932233s ā 5th | 0.017298s ā 1st | 0.019071s ā 2nd | 1.627810s ā 4th | ||
| 5000x5000 | 1.198616s 3rd | 1.933270s ā 5th | 0.972903s ā 2nd | 0.925173s ā 1st | 1.355138s ā 4th | ||
| ----------------------------------------------------------------------------------------------------- | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| 10x10 | 0.000168s 6th | 0.000087s ā 1st | 0.000090s ā 2nd | 0.000118s ā 5th | 0.000104s ā 4th | 0.000092s ā 3rd | ||
| 25x20 | 0.000102s 4th | 0.000113s ā 6th | 0.000099s ā 1st | 0.000099s ā 2nd | 0.000107s ā 5th | 0.000099s ā 3rd | ||
| 50x50 | 0.000181s 4th | 0.000226s ā 6th | 0.000154s ā 1st | 0.000162s ā 2nd | 0.000164s ā 3rd | 0.000210s ā 5th | ||
| 100x150 | 0.000321s 4th | 0.001070s ā 5th | 0.000267s ā 2nd | 0.000265s ā 1st | 0.001108s ā 6th | 0.000267s ā 3rd | ||
| 250x250 | 0.001731s 4th | 0.003008s ā 6th | 0.001673s ā 3rd | 0.001625s ā 2nd | 0.001995s ā 5th | 0.001460s ā 1st | ||
| 550x500 | 0.004940s 1st | 0.168662s ā 6th | 0.009288s ā 4th | 0.009245s ā 3rd | 0.030654s ā 5th | 0.009174s ā 2nd | ||
| 1000x1000 | 0.034701s 5th | 0.051617s ā 6th | 0.024396s ā 3rd | 0.023235s ā 2nd | 0.033910s ā 4th | 0.021512s ā 1st | ||
| 2000x2500 | 0.050450s 4th | 2.519313s ā 6th | 0.017596s ā 1st | 0.018210s ā 2nd | 2.104154s ā 5th | 0.027215s ā 3rd | ||
| 5000x5000 | 2.027199s 5th | 3.501020s ā 6th | 1.753403s ā 4th | 1.732642s ā 3rd | 1.517909s ā 2nd | 0.815372s ā 1st | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| Note: LAPJV-IFT uses in-function filtering lap.lapjv(cost_limit=thresh). | ||
| š ------------------------ OVERALL RANKING ------------------------ š | ||
| 1. LAPX LAPJVX : 976.7065 ms | ā | š„x3 š„x4 š„x2 | ||
| 2. LAPX LAPJV : 1020.4816 ms | ā | š„x3 š„x4 š„x2 | ||
| 3. BASELINE SciPy : 1265.1097 ms | ā | š„x2 š„x1 š„x3 š©x2 š³ļøx1 | ||
| 4. LAPX LAPJVC : 3040.5820 ms | ā | š„x1 š©x7 š³ļøx1 | ||
| 5. LAPX LAPJV-IFT : 4009.2317 ms | ā | š„x1 š„x1 š³ļøx7 | ||
| š ------------------------------------------------------------------- š | ||
| š --------------------------- OVERALL RANKING --------------------------- š | ||
| 1. LAPX LAPJVS : 875.4009 ms | ā | š„x3 š„x1 š„x4 š³ļøx1 | ||
| 2. LAPX LAPJVX : 1785.6020 ms | ā | š„x1 š„x5 š„x2 š³ļøx1 | ||
| 3. LAPX LAPJV : 1806.9635 ms | ā | š„x3 š„x2 š„x2 š©x2 | ||
| 4. BASELINE SciPy : 2119.7929 ms | ā | š„x1 š©x5 š³ļøx2 š„“x1 | ||
| 5. LAPX LAPJVC : 3690.1060 ms | ā | š„x1 š„x1 š©x2 š³ļøx4 š„“x1 | ||
| 6. LAPX LAPJV-IFT : 6245.1169 ms | ā | š„x1 š³ļøx1 š„“x7 | ||
| š ------------------------------------------------------------------------- š | ||
@@ -779,30 +780,28 @@ | ||
| ----------------------------------------------------------------------------------------------------- | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | ||
| ----------------------------------------------------------------------------------------------------- | ||
| 10x10 | 0.000121s 5th | 0.000048s ā 1st | 0.000050s ā 3rd | 0.000049s ā 2nd | 0.000062s ā 4th | ||
| 25x20 | 0.000058s 1st | 0.000086s ā 5th | 0.000063s ā 3rd | 0.000060s ā 2nd | 0.000072s ā 4th | ||
| 50x50 | 0.000102s 3rd | 0.000120s ā 5th | 0.000085s ā 1st | 0.000088s ā 2nd | 0.000111s ā 4th | ||
| 100x150 | 0.000187s 3rd | 0.000713s ā 4th | 0.000183s ā 2nd | 0.000154s ā 1st | 0.000719s ā 5th | ||
| 250x250 | 0.001286s 4th | 0.001058s ā 3rd | 0.000481s ā 2nd | 0.000435s ā 1st | 0.001345s ā 5th | ||
| 550x500 | 0.004404s 1st | 0.098839s ā 5th | 0.007206s ā 3rd | 0.006994s ā 2nd | 0.022169s ā 4th | ||
| 1000x1000 | 0.025491s 3rd | 0.028937s ā 4th | 0.013111s ā 1st | 0.013985s ā 2nd | 0.030395s ā 5th | ||
| 2000x2500 | 0.039780s 3rd | 1.999674s ā 5th | 0.018199s ā 1st | 0.020531s ā 2nd | 1.556668s ā 4th | ||
| 5000x5000 | 1.142951s 4th | 1.586818s ā 5th | 0.720062s ā 1st | 0.723589s ā 2nd | 1.141216s ā 3rd | ||
| ----------------------------------------------------------------------------------------------------- | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| Size | BASELINE SciPy | LAPX LAPJV-IFT | LAPX LAPJV | LAPX LAPJVX | LAPX LAPJVC | LAPX LAPJVS | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| 10x10 | 0.000170s 6th | 0.000079s ā 1st | 0.000092s ā 4th | 0.000085s ā 3rd | 0.000108s ā 5th | 0.000084s ā 2nd | ||
| 25x20 | 0.000120s 5th | 0.000144s ā 6th | 0.000101s ā 1st | 0.000102s ā 3rd | 0.000116s ā 4th | 0.000101s ā 2nd | ||
| 50x50 | 0.000185s 6th | 0.000139s ā 4th | 0.000127s ā 1st | 0.000135s ā 3rd | 0.000158s ā 5th | 0.000135s ā 2nd | ||
| 100x150 | 0.000337s 4th | 0.001089s ā 6th | 0.000264s ā 1st | 0.000296s ā 3rd | 0.001083s ā 5th | 0.000276s ā 2nd | ||
| 250x250 | 0.001832s 6th | 0.001699s ā 5th | 0.000847s ā 2nd | 0.000866s ā 3rd | 0.001471s ā 4th | 0.000813s ā 1st | ||
| 550x500 | 0.005429s 1st | 0.175252s ā 6th | 0.010315s ā 4th | 0.010249s ā 2nd | 0.032756s ā 5th | 0.010292s ā 3rd | ||
| 1000x1000 | 0.040797s 5th | 0.052160s ā 6th | 0.025452s ā 3rd | 0.024602s ā 2nd | 0.036510s ā 4th | 0.021898s ā 1st | ||
| 2000x2500 | 0.048694s 4th | 2.440901s ā 6th | 0.016812s ā 1st | 0.018195s ā 2nd | 2.064631s ā 5th | 0.028164s ā 3rd | ||
| 5000x5000 | 2.152508s 5th | 3.529325s ā 6th | 1.664839s ā 4th | 1.645120s ā 3rd | 1.626812s ā 2nd | 0.897383s ā 1st | ||
| ----------------------------------------------------------------------------------------------------------------------- | ||
| Note: LAPJV-IFT uses in-function filtering lap.lapjv(cost_limit=thresh). | ||
| š ------------------------ OVERALL RANKING ------------------------ š | ||
| 1. LAPX LAPJV : 759.4403 ms | ā | š„x4 š„x2 š„x3 | ||
| 2. LAPX LAPJVX : 765.8850 ms | ā | š„x2 š„x7 | ||
| 3. BASELINE SciPy : 1214.3801 ms | ā | š„x2 š„x4 š©x2 š³ļøx1 | ||
| 4. LAPX LAPJVC : 2752.7570 ms | ā | š„x1 š©x5 š³ļøx3 | ||
| 5. LAPX LAPJV-IFT : 3716.2938 ms | ā | š„x1 š„x1 š©x2 š³ļøx5 | ||
| š ------------------------------------------------------------------- š | ||
| D:\DEV\temp\lapx\.github\test> | ||
| š --------------------------- OVERALL RANKING --------------------------- š | ||
| 1. LAPX LAPJVS : 959.1463 ms | ā | š„x3 š„x4 š„x2 | ||
| 2. LAPX LAPJVX : 1699.6506 ms | ā | š„x3 š„x6 | ||
| 3. LAPX LAPJV : 1718.8494 ms | ā | š„x4 š„x1 š„x1 š©x3 | ||
| 4. BASELINE SciPy : 2250.0724 ms | ā | š„x1 š©x2 š³ļøx3 š„“x3 | ||
| 5. LAPX LAPJVC : 3763.6443 ms | ā | š„x1 š©x3 š³ļøx5 | ||
| 6. LAPX LAPJV-IFT : 6200.7878 ms | ā | š„x1 š©x1 š³ļøx1 š„“x6 | ||
| š ------------------------------------------------------------------------- š | ||
| ``` | ||
| </details> |
+1
-1
@@ -34,3 +34,3 @@ # Copyright (c) 2025 Ratha SIV | MIT License | ||
| __version__ = '0.7.0' | ||
| __version__ = '0.7.1' | ||
@@ -37,0 +37,0 @@ from .lapmod import lapmod |
+40
-56
@@ -6,3 +6,4 @@ # Copyright (c) 2025 Ratha SIV | MIT License | ||
| from ._lapjvs import lapjvs as _lapjvs_raw | ||
| from ._lapjvs import lapjvs_native as _lapjvs_native | ||
| from ._lapjvs import lapjvs_float32 as _lapjvs_float32 | ||
@@ -15,3 +16,3 @@ | ||
| jvx_like: bool = True, | ||
| prefer_float32: bool = False, | ||
| prefer_float32: bool = True, | ||
| ): | ||
@@ -43,20 +44,10 @@ """ | ||
| Returns | ||
| ------- | ||
| One of: | ||
| - (cost, x, y) if return_cost and not jvx_like | ||
| - (x, y) if not return_cost and not jvx_like | ||
| - (cost, row_indices, col_indices) if return_cost and jvx_like | ||
| - (row_indices, col_indices) if not return_cost and jvx_like | ||
| Where: | ||
| - x: np.ndarray shape (n,), dtype=int. x[r] is assigned column for row r, or -1. | ||
| - y: np.ndarray shape (m,), dtype=int. y[c] is assigned row for column c, or -1. | ||
| - row_indices, col_indices: 1D int arrays of equal length K, listing matched (row, col) pairs. | ||
| Notes | ||
| ----- | ||
| - For square inputs without extension, this wraps the raw C function directly and adapts outputs. | ||
| - The solver kernel may run in float32 (when prefer_float32=True) or native float64, | ||
| but the returned total cost is always recomputed from the ORIGINAL input array | ||
| to preserve previous numeric behavior and parity with lapjv/lapjvx. | ||
| - For rectangular inputs, zero-padding exactly models the rectangular LAP. | ||
| """ | ||
| # Keep the original array to compute the final cost from it (preserves previous behavior) | ||
| a = np.asarray(cost) | ||
@@ -66,8 +57,16 @@ if a.ndim != 2: | ||
| if prefer_float32 and a.dtype != np.float32: | ||
| a = a.astype(np.float32, copy=False) | ||
| n, m = a.shape | ||
| extend = (n != m) if (extend_cost is None) else bool(extend_cost) | ||
| # Choose backend and working dtype for the solver only (ensure contiguity to avoid hidden copies) | ||
| use_float32_kernel = not ((prefer_float32 is False) and (a.dtype == np.float64)) | ||
| if use_float32_kernel: | ||
| # Run float32 kernel (casting as needed) | ||
| _kernel = _lapjvs_float32 | ||
| work = np.ascontiguousarray(a, dtype=np.float32) | ||
| else: | ||
| # Run native kernel on float64 inputs | ||
| _kernel = _lapjvs_native | ||
| work = np.ascontiguousarray(a, dtype=np.float64) | ||
| def _rows_cols_from_x(x_vec: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: | ||
@@ -82,6 +81,8 @@ if x_vec.size == 0: | ||
| if not extend: | ||
| total_raw, x_raw, y_raw = _lapjvs_raw(a) | ||
| x_raw = np.asarray(x_raw, dtype=np.int64) | ||
| y_raw = np.asarray(y_raw, dtype=np.int64) | ||
| # Square: call solver directly on chosen dtype, but compute total from ORIGINAL array | ||
| x_raw_obj, y_raw_obj = _kernel(work) | ||
| # Convert only what's needed; y conversion deferred if not used | ||
| x_raw = np.asarray(x_raw_obj, dtype=np.int64) | ||
| if jvx_like: | ||
@@ -95,2 +96,3 @@ rows, cols = _rows_cols_from_x(x_raw) | ||
| else: | ||
| y_raw = np.asarray(y_raw_obj, dtype=np.int64) | ||
| if return_cost: | ||
@@ -102,6 +104,6 @@ total = float(a[np.arange(n), x_raw].sum()) if n > 0 else 0.0 | ||
| # Rectangular: zero-pad to square, solve, then trim back | ||
| # Rectangular: zero-pad to square, solve, then trim back; compute total from ORIGINAL array | ||
| size = max(n, m) | ||
| padded = np.empty((size, size), dtype=a.dtype) | ||
| padded[:n, :m] = a | ||
| padded = np.empty((size, size), dtype=work.dtype) | ||
| padded[:n, :m] = work | ||
| if m < size: | ||
@@ -112,6 +114,4 @@ padded[:n, m:] = 0 | ||
| total_pad, x_pad, y_pad = _lapjvs_raw(padded) | ||
| x_pad = np.asarray(x_pad, dtype=np.int64) | ||
| y_pad = np.asarray(y_pad, dtype=np.int64) | ||
| x_pad_obj, y_pad_obj = _kernel(padded) | ||
| x_pad = np.asarray(x_pad_obj, dtype=np.int64) | ||
| cols_pad_n = x_pad[:n] | ||
@@ -132,31 +132,15 @@ | ||
| # lapjv-like outputs | ||
| tiny_threshold = 32 | ||
| if max(n, m) <= tiny_threshold: | ||
| x_out = np.full(n, -1, dtype=np.int64) | ||
| for r in range(n): | ||
| c = int(cols_pad_n[r]) | ||
| if 0 <= c < m: | ||
| x_out[r] = c | ||
| # lapjv-like outputs (vectorized) | ||
| x_out = np.full(n, -1, dtype=np.int64) | ||
| mask_r = (cols_pad_n >= 0) & (cols_pad_n < m) | ||
| if mask_r.any(): | ||
| x_out[mask_r] = cols_pad_n[mask_r] | ||
| y_out = np.full(m, -1, dtype=np.int64) | ||
| rows_pad_m = y_pad[:m] | ||
| for c in range(m): | ||
| r = int(rows_pad_m[c]) | ||
| if 0 <= r < n: | ||
| y_out[c] = r | ||
| else: | ||
| x_out = np.full(n, -1, dtype=np.int64) | ||
| mask_r = (cols_pad_n >= 0) & (cols_pad_n < m) | ||
| if mask_r.any(): | ||
| r_idx = np.nonzero(mask_r)[0] | ||
| x_out[r_idx] = cols_pad_n[mask_r] | ||
| y_pad = np.asarray(y_pad_obj, dtype=np.int64) | ||
| rows_pad_m = y_pad[:m] | ||
| y_out = np.full(m, -1, dtype=np.int64) | ||
| mask_c = (rows_pad_m >= 0) & (rows_pad_m < n) | ||
| if mask_c.any(): | ||
| y_out[mask_c] = rows_pad_m[mask_c] | ||
| y_out = np.full(m, -1, dtype=np.int64) | ||
| rows_pad_m = y_pad[:m] | ||
| mask_c = (rows_pad_m >= 0) & (rows_pad_m < n) | ||
| if mask_c.any(): | ||
| c_idx = np.nonzero(mask_c)[0] | ||
| y_out[c_idx] = rows_pad_m[mask_c] | ||
| if return_cost and n > 0 and m > 0: | ||
@@ -163,0 +147,0 @@ mask = (x_out >= 0) |
| Metadata-Version: 2.4 | ||
| Name: lapx | ||
| Version: 0.7.0 | ||
| Version: 0.7.1 | ||
| Summary: Linear assignment problem solvers | ||
@@ -72,7 +72,7 @@ Home-page: https://github.com/rathaROG/lapx | ||
| `lapx` features the original **`lapjv()`** and **`lapmod()`** functions, and since **v0.6.0**, `lapx` has introduced three additional assignment solvers: | ||
| `lapx` features the original **`lapjv()`** and **`lapmod()`** functions, and since [**v0.6.0**](https://github.com/rathaROG/lapx/releases/tag/v0.6.0), `lapx` has introduced three additional assignment solvers: | ||
| - **`lapjvx()`** and **`lapjvxa()`** ā enhanced versions of [`lap.lapjv()`](https://github.com/gatagat/lap) with more flexible output formats | ||
| - **`lapjvc()`** ā an enhanced version of Christoph Heindlās [`lapsolver.solve_dense()`](https://github.com/cheind/py-lapsolver) with unified output formats | ||
| `lapx` **v0.7.0** has introduced a new function: **`lapjvs()`** ā an enhanced version of Vadim Markovtsevās [`lapjv`](https://github.com/src-d/lapjv), supporting both rectangular and square cost matrices, with flexible output styles. | ||
| `lapx` [**v0.7.0**](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) has introduced a new function: **`lapjvs()`** ā an enhanced version of Vadim Markovtsevās [`lapjv()`](https://github.com/src-d/lapjv), supporting both rectangular and square cost matrices, with flexible output styles. | ||
@@ -79,0 +79,0 @@ <details><summary>Read more</summary><br> |
+3
-3
| Metadata-Version: 2.4 | ||
| Name: lapx | ||
| Version: 0.7.0 | ||
| Version: 0.7.1 | ||
| Summary: Linear assignment problem solvers | ||
@@ -72,7 +72,7 @@ Home-page: https://github.com/rathaROG/lapx | ||
| `lapx` features the original **`lapjv()`** and **`lapmod()`** functions, and since **v0.6.0**, `lapx` has introduced three additional assignment solvers: | ||
| `lapx` features the original **`lapjv()`** and **`lapmod()`** functions, and since [**v0.6.0**](https://github.com/rathaROG/lapx/releases/tag/v0.6.0), `lapx` has introduced three additional assignment solvers: | ||
| - **`lapjvx()`** and **`lapjvxa()`** ā enhanced versions of [`lap.lapjv()`](https://github.com/gatagat/lap) with more flexible output formats | ||
| - **`lapjvc()`** ā an enhanced version of Christoph Heindlās [`lapsolver.solve_dense()`](https://github.com/cheind/py-lapsolver) with unified output formats | ||
| `lapx` **v0.7.0** has introduced a new function: **`lapjvs()`** ā an enhanced version of Vadim Markovtsevās [`lapjv`](https://github.com/src-d/lapjv), supporting both rectangular and square cost matrices, with flexible output styles. | ||
| `lapx` [**v0.7.0**](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) has introduced a new function: **`lapjvs()`** ā an enhanced version of Vadim Markovtsevās [`lapjv()`](https://github.com/src-d/lapjv), supporting both rectangular and square cost matrices, with flexible output styles. | ||
@@ -79,0 +79,0 @@ <details><summary>Read more</summary><br> |
+2
-2
@@ -21,7 +21,7 @@ <details><summary>š What's new</summary><br> | ||
| `lapx` features the original **`lapjv()`** and **`lapmod()`** functions, and since **v0.6.0**, `lapx` has introduced three additional assignment solvers: | ||
| `lapx` features the original **`lapjv()`** and **`lapmod()`** functions, and since [**v0.6.0**](https://github.com/rathaROG/lapx/releases/tag/v0.6.0), `lapx` has introduced three additional assignment solvers: | ||
| - **`lapjvx()`** and **`lapjvxa()`** ā enhanced versions of [`lap.lapjv()`](https://github.com/gatagat/lap) with more flexible output formats | ||
| - **`lapjvc()`** ā an enhanced version of Christoph Heindlās [`lapsolver.solve_dense()`](https://github.com/cheind/py-lapsolver) with unified output formats | ||
| `lapx` **v0.7.0** has introduced a new function: **`lapjvs()`** ā an enhanced version of Vadim Markovtsevās [`lapjv`](https://github.com/src-d/lapjv), supporting both rectangular and square cost matrices, with flexible output styles. | ||
| `lapx` [**v0.7.0**](https://github.com/rathaROG/lapx/releases/tag/v0.7.0) has introduced a new function: **`lapjvs()`** ā an enhanced version of Vadim Markovtsevās [`lapjv()`](https://github.com/src-d/lapjv), supporting both rectangular and square cost matrices, with flexible output styles. | ||
@@ -28,0 +28,0 @@ <details><summary>Read more</summary><br> |
+95
-64
@@ -10,10 +10,18 @@ #include <functional> | ||
| "This module wraps LAPJVS - Jonker-Volgenant linear sum assignment algorithm (Scalar-only, no AVX2/SIMD)."; | ||
| static char lapjvs_docstring[] = | ||
| "Solves the linear sum assignment problem (Scalar-only)."; | ||
| static char lapjvs_native_docstring[] = | ||
| "Solves the linear sum assignment problem following the input dtype (float32 or float64). Returns (row_ind, col_ind)."; | ||
| static char lapjvs_float32_docstring[] = | ||
| "Solves the linear sum assignment problem in float32 (casts inputs if needed). Returns (row_ind, col_ind)."; | ||
| static PyObject *py_lapjvs(PyObject *self, PyObject *args, PyObject *kwargs); | ||
| static PyObject *py_lapjvs_native(PyObject *self, PyObject *args, PyObject *kwargs); | ||
| static PyObject *py_lapjvs_float32(PyObject *self, PyObject *args, PyObject *kwargs); | ||
| static PyMethodDef module_functions[] = { | ||
| {"lapjvs", reinterpret_cast<PyCFunction>(py_lapjvs), | ||
| METH_VARARGS | METH_KEYWORDS, lapjvs_docstring}, | ||
| // Keep a friendly alias: lapjvs follows native dtype by default | ||
| {"lapjvs", reinterpret_cast<PyCFunction>(py_lapjvs_native), | ||
| METH_VARARGS | METH_KEYWORDS, lapjvs_native_docstring}, | ||
| {"lapjvs_native", reinterpret_cast<PyCFunction>(py_lapjvs_native), | ||
| METH_VARARGS | METH_KEYWORDS, lapjvs_native_docstring}, | ||
| {"lapjvs_float32", reinterpret_cast<PyCFunction>(py_lapjvs_float32), | ||
| METH_VARARGS | METH_KEYWORDS, lapjvs_float32_docstring}, | ||
| {NULL, NULL, 0, NULL} | ||
@@ -64,49 +72,43 @@ }; | ||
| template <typename F> | ||
| static always_inline double call_lap(int dim, const void *restrict cost_matrix, | ||
| bool verbose, | ||
| int *restrict row_ind, int *restrict col_ind, | ||
| void *restrict u, void *restrict v) { | ||
| double lapcost; | ||
| static always_inline void call_lap(int dim, const void *restrict cost_matrix, | ||
| bool verbose, | ||
| int *restrict row_ind, int *restrict col_ind, | ||
| void *restrict v) { | ||
| Py_BEGIN_ALLOW_THREADS | ||
| auto cost_matrix_typed = reinterpret_cast<const F*>(cost_matrix); | ||
| auto u_typed = reinterpret_cast<F*>(u); | ||
| auto v_typed = reinterpret_cast<F*>(v); | ||
| if (verbose) { | ||
| lapcost = lapjvs<true>(dim, cost_matrix_typed, row_ind, col_ind, u_typed, v_typed); | ||
| lapjvs<true>(dim, cost_matrix_typed, row_ind, col_ind, v_typed); | ||
| } else { | ||
| lapcost = lapjvs<false>(dim, cost_matrix_typed, row_ind, col_ind, u_typed, v_typed); | ||
| lapjvs<false>(dim, cost_matrix_typed, row_ind, col_ind, v_typed); | ||
| } | ||
| Py_END_ALLOW_THREADS | ||
| return lapcost; | ||
| } | ||
| static PyObject *py_lapjvs(PyObject *self, PyObject *args, PyObject *kwargs) { | ||
| // Native dtype entry point: lapjvs_native(cost_matrix, verbose=False) | ||
| // - Accepts only float32 or float64 without casting; errors otherwise. | ||
| // - Dispatches to float or double kernel based on the input dtype. | ||
| // - Returns (row_ind, col_ind). | ||
| static PyObject *py_lapjvs_native(PyObject *self, PyObject *args, PyObject *kwargs) { | ||
| PyObject *cost_matrix_obj; | ||
| int verbose = 0; | ||
| int force_doubles = 0; | ||
| int return_original = 0; | ||
| static const char *kwlist[] = { | ||
| "cost_matrix", "verbose", "force_doubles", "return_original", NULL}; | ||
| static const char *kwlist[] = {"cost_matrix", "verbose", NULL}; | ||
| if (!PyArg_ParseTupleAndKeywords( | ||
| args, kwargs, "O|pbb", const_cast<char**>(kwlist), | ||
| &cost_matrix_obj, &verbose, &force_doubles, &return_original)) { | ||
| args, kwargs, "O|p", const_cast<char**>(kwlist), | ||
| &cost_matrix_obj, &verbose)) { | ||
| return NULL; | ||
| } | ||
| // Restore fast default: process as float32 unless force_doubles is set. | ||
| pyarray cost_matrix_array; | ||
| bool float32 = true; | ||
| cost_matrix_array.reset(PyArray_FROM_OTF( | ||
| cost_matrix_obj, NPY_FLOAT32, | ||
| NPY_ARRAY_IN_ARRAY | (force_doubles ? 0 : NPY_ARRAY_FORCECAST))); | ||
| // Ensure array view; do not cast dtype. | ||
| pyarray cost_matrix_array(PyArray_FROM_OTF( | ||
| cost_matrix_obj, NPY_NOTYPE, NPY_ARRAY_IN_ARRAY)); | ||
| if (!cost_matrix_array) { | ||
| PyErr_Clear(); | ||
| float32 = false; | ||
| cost_matrix_array.reset(PyArray_FROM_OTF( | ||
| cost_matrix_obj, NPY_FLOAT64, NPY_ARRAY_IN_ARRAY)); | ||
| if (!cost_matrix_array) { | ||
| PyErr_SetString(PyExc_ValueError, "\"cost_matrix\" must be a numpy array of float32 or float64 dtype"); | ||
| return NULL; | ||
| } | ||
| PyErr_SetString(PyExc_ValueError, "\"cost_matrix\" must be a numpy array"); | ||
| return NULL; | ||
| } | ||
| int typ = PyArray_TYPE(cost_matrix_array.get()); | ||
| if (typ != NPY_FLOAT32 && typ != NPY_FLOAT64) { | ||
| PyErr_SetString(PyExc_TypeError, "\"cost_matrix\" must be float32 or float64 for lapjvs_native()"); | ||
| return NULL; | ||
| } | ||
@@ -137,33 +139,62 @@ auto ndims = PyArray_NDIM(cost_matrix_array.get()); | ||
| double lapcost; | ||
| if (return_original) { | ||
| // Allocate NumPy arrays for u, v only if they are returned. | ||
| pyarray u_array(PyArray_SimpleNew( | ||
| 1, ret_dims, float32? NPY_FLOAT32 : NPY_FLOAT64)); | ||
| pyarray v_array(PyArray_SimpleNew( | ||
| 1, ret_dims, float32? NPY_FLOAT32 : NPY_FLOAT64)); | ||
| auto u = PyArray_DATA(u_array.get()); | ||
| auto v = PyArray_DATA(v_array.get()); | ||
| if (float32) { | ||
| lapcost = call_lap<float>(dim, cost_matrix, verbose, row_ind, col_ind, u, v); | ||
| } else { | ||
| lapcost = call_lap<double>(dim, cost_matrix, verbose, row_ind, col_ind, u, v); | ||
| } | ||
| return Py_BuildValue("(OO(dOO))", | ||
| row_ind_array.get(), col_ind_array.get(), lapcost, | ||
| u_array.get(), v_array.get()); | ||
| if (typ == NPY_FLOAT32) { | ||
| std::unique_ptr<float[]> v(new float[dim]); | ||
| call_lap<float>(dim, cost_matrix, verbose, row_ind, col_ind, v.get()); | ||
| } else { | ||
| // Temporary heap buffers for u, v to avoid NumPy allocation overhead. | ||
| if (float32) { | ||
| std::unique_ptr<float[]> u(new float[dim]); | ||
| std::unique_ptr<float[]> v(new float[dim]); | ||
| lapcost = call_lap<float>(dim, cost_matrix, verbose, row_ind, col_ind, u.get(), v.get()); | ||
| } else { | ||
| std::unique_ptr<double[]> u(new double[dim]); | ||
| std::unique_ptr<double[]> v(new double[dim]); | ||
| lapcost = call_lap<double>(dim, cost_matrix, verbose, row_ind, col_ind, u.get(), v.get()); | ||
| } | ||
| return Py_BuildValue("(dOO)", lapcost, row_ind_array.get(), col_ind_array.get()); | ||
| std::unique_ptr<double[]> v(new double[dim]); | ||
| call_lap<double>(dim, cost_matrix, verbose, row_ind, col_ind, v.get()); | ||
| } | ||
| return Py_BuildValue("(OO)", row_ind_array.get(), col_ind_array.get()); | ||
| } | ||
| // Float32 entry point: lapjvs_float32(cost_matrix, verbose=False) | ||
| // - Casts to float32 if needed, but avoids a copy when already float32. | ||
| // - Returns (row_ind, col_ind). | ||
| static PyObject *py_lapjvs_float32(PyObject *self, PyObject *args, PyObject *kwargs) { | ||
| PyObject *cost_matrix_obj; | ||
| int verbose = 0; | ||
| static const char *kwlist[] = {"cost_matrix", "verbose", NULL}; | ||
| if (!PyArg_ParseTupleAndKeywords( | ||
| args, kwargs, "O|p", const_cast<char**>(kwlist), | ||
| &cost_matrix_obj, &verbose)) { | ||
| return NULL; | ||
| } | ||
| // Allow casting to float32, avoid copy if dtype already matches. | ||
| pyarray cost_matrix_array(PyArray_FROM_OTF( | ||
| cost_matrix_obj, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY | NPY_ARRAY_FORCECAST)); | ||
| if (!cost_matrix_array) { | ||
| PyErr_SetString(PyExc_ValueError, "\"cost_matrix\" must be convertible to float32"); | ||
| return NULL; | ||
| } | ||
| auto ndims = PyArray_NDIM(cost_matrix_array.get()); | ||
| if (ndims != 2) { | ||
| PyErr_SetString(PyExc_ValueError, "\"cost_matrix\" must be a square 2D numpy array"); | ||
| return NULL; | ||
| } | ||
| auto dims = PyArray_DIMS(cost_matrix_array.get()); | ||
| if (dims[0] != dims[1]) { | ||
| PyErr_SetString(PyExc_ValueError, "\"cost_matrix\" must be a square 2D numpy array"); | ||
| return NULL; | ||
| } | ||
| int dim = static_cast<int>(dims[0]); | ||
| if (dim <= 0) { | ||
| PyErr_SetString(PyExc_ValueError, "\"cost_matrix\"'s shape is invalid or too large"); | ||
| return NULL; | ||
| } | ||
| auto cost_matrix = PyArray_DATA(cost_matrix_array.get()); | ||
| npy_intp ret_dims[] = {dim, 0}; | ||
| pyarray row_ind_array(PyArray_SimpleNew(1, ret_dims, NPY_INT)); | ||
| pyarray col_ind_array(PyArray_SimpleNew(1, ret_dims, NPY_INT)); | ||
| auto row_ind = reinterpret_cast<int*>(PyArray_DATA(row_ind_array.get())); | ||
| auto col_ind = reinterpret_cast<int*>(PyArray_DATA(col_ind_array.get())); | ||
| std::unique_ptr<float[]> v(new float[dim]); | ||
| call_lap<float>(dim, cost_matrix, verbose, row_ind, col_ind, v.get()); | ||
| return Py_BuildValue("(OO)", row_ind_array.get(), col_ind_array.get()); | ||
| } |
+31
-33
@@ -5,2 +5,3 @@ #include <cassert> | ||
| #include <memory> | ||
| #include <vector> | ||
@@ -59,13 +60,22 @@ #ifdef __GNUC__ | ||
| /// @param colsol out row assigned to column in solution / size dim | ||
| /// @param u out dual variables, row reduction numbers / size dim | ||
| /// @param v out dual variables, column reduction numbers / size dim | ||
| /// @return achieved minimum assignment cost | ||
| /// @param v inout dual variables, column reduction numbers / size dim | ||
| template <bool verbose, typename idx, typename cost> | ||
| cost lapjvs(int dim, const cost *restrict assign_cost, idx *restrict rowsol, | ||
| idx *restrict colsol, cost *restrict u, cost *restrict v) { | ||
| auto collist = std::make_unique<idx[]>(dim); // list of columns to be scanned in various ways. | ||
| auto matches = std::make_unique<idx[]>(dim); // counts how many times a row could be assigned. | ||
| auto d = std::make_unique<cost[]>(dim); // 'cost-distance' in augmenting path calculation. | ||
| auto pred = std::make_unique<idx[]>(dim); // row-predecessor of column in augmenting/alternating path. | ||
| void lapjvs(int dim, const cost *restrict assign_cost, idx *restrict rowsol, | ||
| idx *restrict colsol, cost *restrict v) { | ||
| // Reuse per-thread buffers to avoid per-call allocations | ||
| static thread_local std::vector<idx> collist_vec; | ||
| static thread_local std::vector<idx> matches_vec; | ||
| static thread_local std::vector<idx> pred_vec; | ||
| static thread_local std::vector<cost> d_vec; | ||
| if ((int)collist_vec.size() < dim) collist_vec.resize(dim); | ||
| if ((int)matches_vec.size() < dim) matches_vec.resize(dim); | ||
| if ((int)pred_vec.size() < dim) pred_vec.resize(dim); | ||
| if ((int)d_vec.size() < dim) d_vec.resize(dim); | ||
| idx *restrict collist = collist_vec.data(); // list of columns to be scanned. | ||
| idx *restrict matches = matches_vec.data(); // counts how many times a row could be assigned. | ||
| cost *restrict d = d_vec.data(); // 'cost-distance' in augmenting path calculation. | ||
| idx *restrict pred = pred_vec.data(); // row-predecessor of column in augmenting/alternating path. | ||
| // init how many times a row will be assigned in the column reduction. | ||
@@ -91,3 +101,3 @@ for (idx i = 0; i < dim; i++) { | ||
| if (++matches[imin] == 1) { | ||
| // init assignment if minimum row assigned for first time. | ||
| // init assignment if minimum row assigned for the first time. | ||
| rowsol[imin] = j; | ||
@@ -104,3 +114,3 @@ colsol[j] = imin; | ||
| // REDUCTION TRANSFER | ||
| auto free = matches.get(); // list of unassigned rows. | ||
| idx *restrict free_rows = matches; // list of unassigned rows (reuse matches' storage). | ||
| idx numfree = 0; | ||
@@ -110,4 +120,4 @@ for (idx i = 0; i < dim; i++) { | ||
| if (matches[i] == 0) { // fill list of unassigned 'free' rows. | ||
| free[numfree++] = i; | ||
| } else if (matches[i] == 1) { // transfer reduction from rows that are assigned once. | ||
| free_rows[numfree++] = i; | ||
| } else if (matches[i] == 1) { // transfer reduction from rows assigned once. | ||
| idx j1 = rowsol[i]; | ||
@@ -117,5 +127,4 @@ cost min = std::numeric_limits<cost>::max(); | ||
| if (j != j1) { | ||
| if (local_cost[j] - v[j] < min) { | ||
| min = local_cost[j] - v[j]; | ||
| } | ||
| cost cand = local_cost[j] - v[j]; | ||
| if (cand < min) min = cand; | ||
| } | ||
@@ -136,3 +145,3 @@ } | ||
| while (k < prevnumfree) { | ||
| idx i = free[k++]; | ||
| idx i = free_rows[k++]; | ||
@@ -159,5 +168,5 @@ // find minimum and second minimum reduced cost over columns. | ||
| if (vj1_lowers) { | ||
| free[--k] = i0; | ||
| free_rows[--k] = i0; | ||
| } else { | ||
| free[numfree++] = i0; | ||
| free_rows[numfree++] = i0; | ||
| } | ||
@@ -174,3 +183,3 @@ } | ||
| idx endofpath; | ||
| idx freerow = free[f]; // start row of augmenting path. | ||
| idx freerow = free_rows[f]; // start row of augmenting path. | ||
| if (verbose) { | ||
@@ -265,15 +274,4 @@ printf("lapjvs: AUGMENT SOLUTION row %d [%d / %d]\n", | ||
| // calculate optimal cost. | ||
| cost lapcost = 0; | ||
| for (idx i = 0; i < dim; i++) { | ||
| const cost *local_cost = &assign_cost[i * dim]; | ||
| idx j = rowsol[i]; | ||
| u[i] = local_cost[j] - v[j]; | ||
| lapcost += local_cost[j]; | ||
| } | ||
| if (verbose) { | ||
| printf("lapjvs: optimal cost calculated\n"); | ||
| } | ||
| return lapcost; | ||
| // Final cost and row duals (u) are not computed here anymore, since the Python | ||
| // wrapper recomputes the total cost from the original input for numeric parity. | ||
| } |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
1465554
0.26%605
-2.1%