You're Invited:Meet the Socket Team at RSAC and BSidesSF 2026, March 23–26.RSVP
Socket
Book a DemoSign in
Socket

linopy

Package Overview
Dependencies
Maintainers
2
Versions
64
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

linopy - pypi Package Compare versions

Comparing version
0.6.1
to
0.6.2
+639
benchmark/benchmark_auto_mask.py
#!/usr/bin/env python3
"""
Benchmark comparing manual masking vs auto_mask for models with NaN coefficients.
This creates a realistic scenario: a multi-period dispatch model where:
- Not all generators are available in all time periods (NaN in capacity bounds)
- Not all transmission lines exist between all regions (NaN in flow limits)
"""
import sys
from pathlib import Path
# Ensure we use the local linopy installation
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))
import time # noqa: E402
from typing import Any # noqa: E402
import numpy as np # noqa: E402
import pandas as pd # noqa: E402
from linopy import GREATER_EQUAL, Model # noqa: E402
def create_nan_data(
n_generators: int = 500,
n_periods: int = 100,
n_regions: int = 20,
nan_fraction_gen: float = 0.3, # 30% of generator-period combinations unavailable
nan_fraction_lines: float = 0.7, # 70% of region pairs have no direct line
seed: int = 42,
) -> dict[str, Any]:
"""Create realistic input data with NaN patterns."""
rng = np.random.default_rng(seed)
generators = pd.Index(range(n_generators), name="generator")
periods = pd.Index(range(n_periods), name="period")
regions = pd.Index(range(n_regions), name="region")
# Generator capacities - some generators unavailable in some periods (maintenance, etc.)
gen_capacity = pd.DataFrame(
rng.uniform(50, 500, size=(n_generators, n_periods)),
index=generators,
columns=periods,
)
# Set random entries to NaN (generator unavailable)
nan_mask_gen = rng.random((n_generators, n_periods)) < nan_fraction_gen
gen_capacity.values[nan_mask_gen] = np.nan
# Generator costs
gen_cost = pd.Series(rng.uniform(10, 100, n_generators), index=generators)
# Generator to region mapping
gen_region = pd.Series(rng.integers(0, n_regions, n_generators), index=generators)
# Demand per region per period
demand = pd.DataFrame(
rng.uniform(100, 1000, size=(n_regions, n_periods)),
index=regions,
columns=periods,
)
# Transmission line capacities - sparse network (not all regions connected)
# Use distinct dimension names to avoid xarray duplicate dimension issues
regions_from = pd.Index(range(n_regions), name="region_from")
regions_to = pd.Index(range(n_regions), name="region_to")
line_capacity = pd.DataFrame(
np.nan,
index=regions_from,
columns=regions_to,
dtype=float, # Start with all NaN
)
# Only some region pairs have lines
for i in range(n_regions):
for j in range(n_regions):
if i != j and rng.random() > nan_fraction_lines:
line_capacity.loc[i, j] = rng.uniform(100, 500)
return {
"generators": generators,
"periods": periods,
"regions": regions,
"regions_from": regions_from,
"regions_to": regions_to,
"gen_capacity": gen_capacity,
"gen_cost": gen_cost,
"gen_region": gen_region,
"demand": demand,
"line_capacity": line_capacity,
}
def build_model_manual_mask(data: dict[str, Any]) -> Model:
"""Build model using manual masking (traditional approach)."""
m = Model()
generators = data["generators"]
periods = data["periods"]
regions = data["regions"]
regions_from = data["regions_from"]
regions_to = data["regions_to"]
gen_capacity = data["gen_capacity"]
gen_cost = data["gen_cost"]
gen_region = data["gen_region"]
demand = data["demand"]
line_capacity = data["line_capacity"]
# Generator dispatch variables - manually mask where capacity is NaN
gen_mask = gen_capacity.notnull()
dispatch = m.add_variables(
lower=0,
upper=gen_capacity,
coords=[generators, periods],
name="dispatch",
mask=gen_mask,
)
# Flow variables between regions - manually mask where no line exists
flow_mask = line_capacity.notnull()
flow = m.add_variables(
lower=-line_capacity.abs(),
upper=line_capacity.abs(),
coords=[regions_from, regions_to],
name="flow",
mask=flow_mask,
)
# Energy balance constraint per region per period
for r in regions:
gens_in_region = generators[gen_region == r]
gen_sum = dispatch.loc[gens_in_region, :].sum("generator")
# Net flow into region
flow_in = flow.loc[:, r].sum("region_from")
flow_out = flow.loc[r, :].sum("region_to")
m.add_constraints(
gen_sum + flow_in - flow_out,
GREATER_EQUAL,
demand.loc[r],
name=f"balance_r{r}",
)
# Objective: minimize generation cost
obj = (dispatch * gen_cost).sum()
m.add_objective(obj)
return m
def build_model_auto_mask(data: dict[str, Any]) -> Model:
"""Build model using auto_mask=True (new approach)."""
m = Model(auto_mask=True)
generators = data["generators"]
periods = data["periods"]
regions = data["regions"]
regions_from = data["regions_from"]
regions_to = data["regions_to"]
gen_capacity = data["gen_capacity"]
gen_cost = data["gen_cost"]
gen_region = data["gen_region"]
demand = data["demand"]
line_capacity = data["line_capacity"]
# Generator dispatch variables - auto-masked where capacity is NaN
dispatch = m.add_variables(
lower=0,
upper=gen_capacity, # NaN values will be auto-masked
coords=[generators, periods],
name="dispatch",
)
# Flow variables between regions - auto-masked where no line exists
flow = m.add_variables(
lower=-line_capacity.abs(),
upper=line_capacity.abs(), # NaN values will be auto-masked
coords=[regions_from, regions_to],
name="flow",
)
# Energy balance constraint per region per period
for r in regions:
gens_in_region = generators[gen_region == r]
gen_sum = dispatch.loc[gens_in_region, :].sum("generator")
# Net flow into region
flow_in = flow.loc[:, r].sum("region_from")
flow_out = flow.loc[r, :].sum("region_to")
m.add_constraints(
gen_sum + flow_in - flow_out,
GREATER_EQUAL,
demand.loc[r],
name=f"balance_r{r}",
)
# Objective: minimize generation cost
obj = (dispatch * gen_cost).sum()
m.add_objective(obj)
return m
def build_model_no_mask(data: dict[str, Any]) -> Model:
"""Build model WITHOUT any masking (NaN values left in place)."""
m = Model()
generators = data["generators"]
periods = data["periods"]
regions = data["regions"]
regions_from = data["regions_from"]
regions_to = data["regions_to"]
gen_capacity = data["gen_capacity"]
gen_cost = data["gen_cost"]
gen_region = data["gen_region"]
demand = data["demand"]
line_capacity = data["line_capacity"]
# Generator dispatch variables - NO masking, NaN bounds left in place
dispatch = m.add_variables(
lower=0,
upper=gen_capacity, # Contains NaN values
coords=[generators, periods],
name="dispatch",
)
# Flow variables between regions - NO masking
flow = m.add_variables(
lower=-line_capacity.abs(),
upper=line_capacity.abs(), # Contains NaN values
coords=[regions_from, regions_to],
name="flow",
)
# Energy balance constraint per region per period
for r in regions:
gens_in_region = generators[gen_region == r]
gen_sum = dispatch.loc[gens_in_region, :].sum("generator")
# Net flow into region
flow_in = flow.loc[:, r].sum("region_from")
flow_out = flow.loc[r, :].sum("region_to")
m.add_constraints(
gen_sum + flow_in - flow_out,
GREATER_EQUAL,
demand.loc[r],
name=f"balance_r{r}",
)
# Objective: minimize generation cost
obj = (dispatch * gen_cost).sum()
m.add_objective(obj)
return m
def benchmark(
n_generators: int = 500,
n_periods: int = 100,
n_regions: int = 20,
n_runs: int = 3,
solve: bool = True,
) -> dict[str, Any]:
"""Run benchmark comparing no masking, manual masking, and auto masking."""
print("=" * 70)
print("BENCHMARK: No Masking vs Manual Masking vs Auto-Masking")
print("=" * 70)
print("\nModel size:")
print(f" - Generators: {n_generators}")
print(f" - Time periods: {n_periods}")
print(f" - Regions: {n_regions}")
print(f" - Potential dispatch vars: {n_generators * n_periods:,}")
print(f" - Potential flow vars: {n_regions * n_regions:,}")
print(f"\nRunning {n_runs} iterations each...\n")
# Generate data once
data = create_nan_data(
n_generators=n_generators,
n_periods=n_periods,
n_regions=n_regions,
)
# Count NaN entries
gen_nan_count = data["gen_capacity"].isna().sum().sum()
gen_total = data["gen_capacity"].size
line_nan_count = data["line_capacity"].isna().sum().sum()
line_total = data["line_capacity"].size
print("NaN statistics:")
print(
f" - Generator capacity: {gen_nan_count:,}/{gen_total:,} "
f"({100 * gen_nan_count / gen_total:.1f}% NaN)"
)
print(
f" - Line capacity: {line_nan_count:,}/{line_total:,} "
f"({100 * line_nan_count / line_total:.1f}% NaN)"
)
print()
# Benchmark NO masking (baseline)
no_mask_times = []
for i in range(n_runs):
start = time.perf_counter()
m_no_mask = build_model_no_mask(data)
elapsed = time.perf_counter() - start
no_mask_times.append(elapsed)
if i == 0:
# Can't use nvars directly as it will fail with NaN values
# Instead count total variable labels (including those with NaN bounds)
no_mask_nvars = sum(
m_no_mask.variables[k].labels.size for k in m_no_mask.variables
)
no_mask_ncons = m_no_mask.ncons
# Benchmark manual masking
manual_times = []
for i in range(n_runs):
start = time.perf_counter()
m_manual = build_model_manual_mask(data)
elapsed = time.perf_counter() - start
manual_times.append(elapsed)
if i == 0:
manual_nvars = m_manual.nvars
manual_ncons = m_manual.ncons
# Benchmark auto masking
auto_times = []
for i in range(n_runs):
start = time.perf_counter()
m_auto = build_model_auto_mask(data)
elapsed = time.perf_counter() - start
auto_times.append(elapsed)
if i == 0:
auto_nvars = m_auto.nvars
auto_ncons = m_auto.ncons
# Results
print("-" * 70)
print("RESULTS: Model Building Time")
print("-" * 70)
print("\nNo masking (baseline):")
print(f" - Mean time: {np.mean(no_mask_times):.3f}s")
print(f" - Variables: {no_mask_nvars:,} (includes NaN-bounded vars)")
print(f" - Constraints: {no_mask_ncons:,}")
print("\nManual masking:")
print(f" - Mean time: {np.mean(manual_times):.3f}s")
print(f" - Variables: {manual_nvars:,}")
print(f" - Constraints: {manual_ncons:,}")
manual_overhead = np.mean(manual_times) - np.mean(no_mask_times)
print(f" - Overhead vs no-mask: {manual_overhead * 1000:+.1f}ms")
print("\nAuto masking:")
print(f" - Mean time: {np.mean(auto_times):.3f}s")
print(f" - Variables: {auto_nvars:,}")
print(f" - Constraints: {auto_ncons:,}")
auto_overhead = np.mean(auto_times) - np.mean(no_mask_times)
print(f" - Overhead vs no-mask: {auto_overhead * 1000:+.1f}ms")
# Comparison
print("\nComparison (Auto vs Manual):")
speedup = np.mean(manual_times) / np.mean(auto_times)
diff = np.mean(auto_times) - np.mean(manual_times)
if speedup > 1:
print(f" - Auto-mask is {speedup:.2f}x FASTER than manual")
else:
print(f" - Auto-mask is {1 / speedup:.2f}x SLOWER than manual")
print(f" - Time difference: {diff * 1000:+.1f}ms")
# Verify models are equivalent
print("\nVerification:")
print(f" - Manual == Auto variables: {manual_nvars == auto_nvars}")
print(f" - Manual == Auto constraints: {manual_ncons == auto_ncons}")
print(f" - Variables masked out: {no_mask_nvars - manual_nvars:,}")
results = {
"n_generators": n_generators,
"n_periods": n_periods,
"potential_vars": n_generators * n_periods,
"no_mask_time": np.mean(no_mask_times),
"manual_time": np.mean(manual_times),
"auto_time": np.mean(auto_times),
"nvars": manual_nvars,
"masked_out": no_mask_nvars - manual_nvars,
}
# LP file write benchmark
print("\n" + "-" * 70)
print("RESULTS: LP File Write Time & Size")
print("-" * 70)
import os
import tempfile
# Write LP file for manual masked model
with tempfile.NamedTemporaryFile(suffix=".lp", delete=False) as f:
manual_lp_path = f.name
start = time.perf_counter()
m_manual.to_file(manual_lp_path)
manual_write_time = time.perf_counter() - start
manual_lp_size = os.path.getsize(manual_lp_path) / (1024 * 1024) # MB
os.unlink(manual_lp_path)
# Write LP file for auto masked model
with tempfile.NamedTemporaryFile(suffix=".lp", delete=False) as f:
auto_lp_path = f.name
start = time.perf_counter()
m_auto.to_file(auto_lp_path)
auto_write_time = time.perf_counter() - start
auto_lp_size = os.path.getsize(auto_lp_path) / (1024 * 1024) # MB
os.unlink(auto_lp_path)
print("\nManual masking:")
print(f" - Write time: {manual_write_time:.3f}s")
print(f" - File size: {manual_lp_size:.2f} MB")
print("\nAuto masking:")
print(f" - Write time: {auto_write_time:.3f}s")
print(f" - File size: {auto_lp_size:.2f} MB")
print(f"\nFiles identical: {abs(manual_lp_size - auto_lp_size) < 0.01}")
results["manual_write_time"] = manual_write_time
results["auto_write_time"] = auto_write_time
results["lp_size_mb"] = manual_lp_size
# Quick solve comparison
if solve:
print("\n" + "-" * 70)
print("RESULTS: Solve Time (single run)")
print("-" * 70)
start = time.perf_counter()
m_manual.solve("highs", io_api="direct")
manual_solve = time.perf_counter() - start
start = time.perf_counter()
m_auto.solve("highs", io_api="direct")
auto_solve = time.perf_counter() - start
print(f"\nManual masking solve: {manual_solve:.3f}s")
print(f"Auto masking solve: {auto_solve:.3f}s")
if m_manual.objective.value is not None and m_auto.objective.value is not None:
print(
f"Objective values match: "
f"{np.isclose(m_manual.objective.value, m_auto.objective.value)}"
)
print(f" - Manual: {m_manual.objective.value:.2f}")
print(f" - Auto: {m_auto.objective.value:.2f}")
return results
def benchmark_code_simplicity() -> None:
"""Show the code simplicity benefit of auto_mask."""
print("\n" + "=" * 70)
print("CODE COMPARISON: Manual vs Auto-Mask")
print("=" * 70)
manual_code = """
# Manual masking - must create mask explicitly
gen_mask = gen_capacity.notnull()
dispatch = m.add_variables(
lower=0,
upper=gen_capacity,
coords=[generators, periods],
name="dispatch",
mask=gen_mask, # Extra step required
)
"""
auto_code = """
# Auto masking - just pass the data with NaN
m = Model(auto_mask=True)
dispatch = m.add_variables(
lower=0,
upper=gen_capacity, # NaN auto-masked
coords=[generators, periods],
name="dispatch",
)
"""
print("\nManual masking approach:")
print(manual_code)
print("Auto-mask approach:")
print(auto_code)
print("Benefits of auto_mask:")
print(" - Less boilerplate code")
print(" - No need to manually track which arrays have NaN")
print(" - Reduces risk of forgetting to mask")
print(" - Cleaner, more declarative style")
def benchmark_constraint_masking(n_runs: int = 3) -> None:
"""Benchmark auto-masking of constraints with NaN in RHS."""
print("\n" + "=" * 70)
print("BENCHMARK: Constraint Auto-Masking (NaN in RHS)")
print("=" * 70)
n_vars = 1000
n_constraints = 5000
nan_fraction = 0.3
rng = np.random.default_rng(42)
idx = pd.Index(range(n_vars), name="i")
con_idx = pd.Index(range(n_constraints), name="c")
# Create RHS with NaN values
rhs = pd.Series(rng.uniform(1, 100, n_constraints), index=con_idx)
nan_mask = rng.random(n_constraints) < nan_fraction
rhs.values[nan_mask] = np.nan
print("\nModel size:")
print(f" - Variables: {n_vars}")
print(f" - Potential constraints: {n_constraints}")
print(f" - NaN in RHS: {nan_mask.sum()} ({100 * nan_fraction:.0f}%)")
print(f"\nRunning {n_runs} iterations each...\n")
# Manual masking
manual_times = []
for i in range(n_runs):
start = time.perf_counter()
m = Model()
x = m.add_variables(lower=0, coords=[idx], name="x")
coeffs = pd.DataFrame(
rng.uniform(0.1, 1, (n_constraints, n_vars)), index=con_idx, columns=idx
)
con_mask = rhs.notnull() # Manual mask creation
m.add_constraints((coeffs * x).sum("i"), GREATER_EQUAL, rhs, mask=con_mask)
m.add_objective(x.sum())
elapsed = time.perf_counter() - start
manual_times.append(elapsed)
if i == 0:
manual_ncons = m.ncons
# Auto masking
auto_times = []
for i in range(n_runs):
start = time.perf_counter()
m = Model(auto_mask=True)
x = m.add_variables(lower=0, coords=[idx], name="x")
coeffs = pd.DataFrame(
rng.uniform(0.1, 1, (n_constraints, n_vars)), index=con_idx, columns=idx
)
m.add_constraints((coeffs * x).sum("i"), GREATER_EQUAL, rhs) # No mask needed
m.add_objective(x.sum())
elapsed = time.perf_counter() - start
auto_times.append(elapsed)
if i == 0:
auto_ncons = m.ncons
print("-" * 70)
print("RESULTS: Constraint Building Time")
print("-" * 70)
print("\nManual masking:")
print(f" - Mean time: {np.mean(manual_times):.3f}s")
print(f" - Active constraints: {manual_ncons:,}")
print("\nAuto masking:")
print(f" - Mean time: {np.mean(auto_times):.3f}s")
print(f" - Active constraints: {auto_ncons:,}")
overhead = np.mean(auto_times) - np.mean(manual_times)
print(f"\nOverhead: {overhead * 1000:.1f}ms")
print(f"Same constraint count: {manual_ncons == auto_ncons}")
def print_summary_table(results: list[dict[str, Any]]) -> None:
"""Print a summary table of all benchmark results."""
print("\n" + "=" * 110)
print("SUMMARY TABLE: Model Building & LP Write Times")
print("=" * 110)
print(
f"{'Model':<12} {'Pot.Vars':>10} {'Act.Vars':>10} {'Masked':>8} "
f"{'No-Mask':>9} {'Manual':>9} {'Auto':>9} {'Diff':>8} "
f"{'LP Write':>9} {'LP Size':>9}"
)
print("-" * 110)
for r in results:
name = f"{r['n_generators']}x{r['n_periods']}"
lp_write = r.get("manual_write_time", 0) * 1000
lp_size = r.get("lp_size_mb", 0)
print(
f"{name:<12} {r['potential_vars']:>10,} {r['nvars']:>10,} "
f"{r['masked_out']:>8,} {r['no_mask_time'] * 1000:>8.0f}ms "
f"{r['manual_time'] * 1000:>8.0f}ms {r['auto_time'] * 1000:>8.0f}ms "
f"{(r['auto_time'] - r['manual_time']) * 1000:>+7.0f}ms "
f"{lp_write:>8.0f}ms {lp_size:>8.1f}MB"
)
print("-" * 110)
print("Pot.Vars = Potential variables, Act.Vars = Active (non-masked) variables")
print("Masked = Variables masked out due to NaN bounds")
print("Diff = Auto-mask time minus Manual mask time (negative = faster)")
print("LP Write = Time to write LP file, LP Size = LP file size in MB")
if __name__ == "__main__":
all_results = []
# Run benchmarks with different sizes
print("\n### SMALL MODEL ###")
all_results.append(
benchmark(n_generators=100, n_periods=50, n_regions=10, n_runs=5, solve=False)
)
print("\n\n### MEDIUM MODEL ###")
all_results.append(
benchmark(n_generators=500, n_periods=100, n_regions=20, n_runs=3, solve=False)
)
print("\n\n### LARGE MODEL ###")
all_results.append(
benchmark(n_generators=1000, n_periods=200, n_regions=30, n_runs=3, solve=False)
)
print("\n\n### VERY LARGE MODEL ###")
all_results.append(
benchmark(n_generators=2000, n_periods=500, n_regions=40, n_runs=3, solve=False)
)
print("\n\n### EXTRA LARGE MODEL ###")
all_results.append(
benchmark(n_generators=5000, n_periods=500, n_regions=50, n_runs=2, solve=False)
)
# Print summary table
print_summary_table(all_results)
# Run constraint benchmark
benchmark_constraint_masking()
# Show code comparison
benchmark_code_simplicity()
+4
-0

@@ -45,4 +45,8 @@ # Configuration file for the Sphinx documentation builder.

"sphinx.ext.imgconverter", # for SVG conversion
"sphinx_copybutton",
]
copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: "
copybutton_prompt_is_regexp = True
# Add any paths that contain templates here, relative to this directory.

@@ -49,0 +53,0 @@ templates_path = ["_templates"]

Release Notes
=============
.. Upcoming Version
Upcoming Version
----------------
Version 0.6.2
--------------
**Features**
* Add ``auto_mask`` parameter to ``Model`` class that automatically masks variables and constraints where bounds, coefficients, or RHS values contain NaN. This eliminates the need to manually create mask arrays when working with sparse or incomplete data.
**Performance**
* Speed up LP file writing by 2-2.7x on large models through Polars streaming engine, join-based constraint assembly, and reduced per-constraint overhead
**Bug Fixes**
* Fix multiplication of constant-only ``LinearExpression`` with other expressions
* Fix docs and Gurobi license handling
Version 0.6.1

@@ -7,0 +24,0 @@ --------------

+3
-2
Metadata-Version: 2.4
Name: linopy
Version: 0.6.1
Version: 0.6.2
Summary: Linear optimization with N-D labeled arrays in Python

@@ -30,3 +30,3 @@ Author-email: Fabian Hofmann <fabianmarikhofmann@gmail.com>

Requires-Dist: dask>=0.18.0
Requires-Dist: polars
Requires-Dist: polars>=1.31
Requires-Dist: tqdm

@@ -43,2 +43,3 @@ Requires-Dist: deprecation

Requires-Dist: sphinx_book_theme==1.1.3; extra == "docs"
Requires-Dist: sphinx-copybutton==0.5.2; extra == "docs"
Requires-Dist: nbsphinx==0.9.4; extra == "docs"

@@ -45,0 +46,0 @@ Requires-Dist: nbsphinx-link==1.3.0; extra == "docs"

@@ -7,3 +7,3 @@ scipy

dask>=0.18.0
polars
polars>=1.31
tqdm

@@ -39,2 +39,3 @@ deprecation

sphinx_book_theme==1.1.3
sphinx-copybutton==0.5.2
nbsphinx==0.9.4

@@ -41,0 +42,0 @@ nbsphinx-link==1.3.0

@@ -28,2 +28,3 @@ .git-blame-ignore-revs

benchmark/benchmark-overhead.png
benchmark/benchmark_auto_mask.py
benchmark/benchmark_resource-absolute.pdf

@@ -30,0 +31,0 @@ benchmark/benchmark_resource-absolute.png

@@ -452,2 +452,21 @@ #!/usr/bin/env python3

def maybe_group_terms_polars(df: pl.DataFrame) -> pl.DataFrame:
"""
Group terms only if there are duplicate (labels, vars) pairs.
This avoids the expensive group_by operation when terms already
reference distinct variables (e.g. ``x - y`` has ``_term=2`` but
no duplicates). When skipping, columns are reordered to match the
output of ``group_terms_polars``.
"""
varcols = [c for c in df.columns if c.startswith("vars")]
keys = [c for c in ["labels"] + varcols if c in df.columns]
key_count = df.select(pl.struct(keys).n_unique()).item()
if key_count < df.height:
return group_terms_polars(df)
# Match column order of group_terms (group-by keys, coeffs, rest)
rest = [c for c in df.columns if c not in keys and c != "coeffs"]
return df.select(keys + ["coeffs"] + rest)
def save_join(*dataarrays: DataArray, integer_dtype: bool = False) -> Dataset:

@@ -454,0 +473,0 @@ """

@@ -43,6 +43,5 @@ """

get_label_position,
group_terms_polars,
has_optimized_model,
infer_schema_polars,
iterate_slices,
maybe_group_terms_polars,
maybe_replace_signs,

@@ -626,17 +625,34 @@ print_coord,

long = filter_nulls_polars(long)
long = group_terms_polars(long)
if ds.sizes.get("_term", 1) > 1:
long = maybe_group_terms_polars(long)
check_has_nulls_polars(long, name=f"{self.type} {self.name}")
short_ds = ds[[k for k in ds if "_term" not in ds[k].dims]]
schema = infer_schema_polars(short_ds)
schema["sign"] = pl.Enum(["=", "<=", ">="])
short = to_polars(short_ds, schema=schema)
short = filter_nulls_polars(short)
check_has_nulls_polars(short, name=f"{self.type} {self.name}")
# Build short DataFrame (labels, rhs, sign) without xarray broadcast.
# Apply labels mask directly instead of filter_nulls_polars.
labels_flat = ds["labels"].values.reshape(-1)
mask = labels_flat != -1
labels_masked = labels_flat[mask]
rhs_flat = np.broadcast_to(ds["rhs"].values, ds["labels"].shape).reshape(-1)
df = pl.concat([short, long], how="diagonal_relaxed").sort(["labels", "rhs"])
# delete subsequent non-null rhs (happens is all vars per label are -1)
is_non_null = df["rhs"].is_not_null()
prev_non_is_null = is_non_null.shift(1).fill_null(False)
df = df.filter(is_non_null & ~prev_non_is_null | ~is_non_null)
sign_values = ds["sign"].values
sign_flat = np.broadcast_to(sign_values, ds["labels"].shape).reshape(-1)
all_same_sign = len(sign_flat) > 0 and (
sign_flat[0] == sign_flat[-1] and (sign_flat[0] == sign_flat).all()
)
short_data: dict = {
"labels": labels_masked,
"rhs": rhs_flat[mask],
}
if all_same_sign:
short = pl.DataFrame(short_data).with_columns(
pl.lit(sign_flat[0]).cast(pl.Enum(["=", "<=", ">="])).alias("sign")
)
else:
short_data["sign"] = pl.Series(
"sign", sign_flat[mask], dtype=pl.Enum(["=", "<=", ">="])
)
short = pl.DataFrame(short_data)
df = long.join(short, on="labels", how="inner")
return df[["labels", "coeffs", "vars", "sign", "rhs"]]

@@ -643,0 +659,0 @@

@@ -57,2 +57,17 @@ #!/usr/bin/env python3

def _format_and_write(
df: pl.DataFrame, columns: list[pl.Expr], f: BufferedWriter
) -> None:
"""
Format columns via concat_str and write to file.
Uses Polars streaming engine for better memory efficiency.
"""
df.lazy().select(pl.concat_str(columns, ignore_nulls=True)).collect(
engine="streaming"
).write_csv(
f, separator=" ", null_value="", quote_style="never", include_header=False
)
def signed_number(expr: pl.Expr) -> tuple[pl.Expr, pl.Expr]:

@@ -159,6 +174,3 @@ """

]
df = df.select(pl.concat_str(cols, ignore_nulls=True))
df.write_csv(
f, separator=" ", null_value="", quote_style="never", include_header=False
)
_format_and_write(df, cols, f)

@@ -176,6 +188,3 @@

f.write(b"+ [\n")
df = df.select(pl.concat_str(cols, ignore_nulls=True))
df.write_csv(
f, separator=" ", null_value="", quote_style="never", include_header=False
)
_format_and_write(df, cols, f)
f.write(b"] / 2\n")

@@ -260,7 +269,3 @@

kwargs: Any = dict(
separator=" ", null_value="", quote_style="never", include_header=False
)
formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
formatted.write_csv(f, **kwargs)
_format_and_write(df, columns, f)

@@ -303,7 +308,3 @@

kwargs: Any = dict(
separator=" ", null_value="", quote_style="never", include_header=False
)
formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
formatted.write_csv(f, **kwargs)
_format_and_write(df, columns, f)

@@ -347,7 +348,3 @@

kwargs: Any = dict(
separator=" ", null_value="", quote_style="never", include_header=False
)
formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
formatted.write_csv(f, **kwargs)
_format_and_write(df, columns, f)

@@ -408,7 +405,3 @@

kwargs: Any = dict(
separator=" ", null_value="", quote_style="never", include_header=False
)
formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
formatted.write_csv(f, **kwargs)
_format_and_write(df, columns, f)

@@ -450,27 +443,6 @@

# Ensure each constraint has both coefficient and RHS terms
analysis = df.group_by("labels").agg(
[
pl.col("coeffs").is_not_null().sum().alias("coeff_rows"),
pl.col("sign").is_not_null().sum().alias("rhs_rows"),
]
)
valid = analysis.filter(
(pl.col("coeff_rows") > 0) & (pl.col("rhs_rows") > 0)
)
if valid.height == 0:
continue
# Keep only constraints that have both parts
df = df.join(valid.select("labels"), on="labels", how="inner")
# Sort by labels and mark first/last occurrences
df = df.sort("labels").with_columns(
[
pl.when(pl.col("labels").is_first_distinct())
.then(pl.col("labels"))
.otherwise(pl.lit(None))
.alias("labels_first"),
pl.col("labels").is_first_distinct().alias("is_first_in_group"),
(pl.col("labels") != pl.col("labels").shift(-1))

@@ -482,13 +454,12 @@ .fill_null(True)

row_labels = print_constraint(pl.col("labels_first"))
row_labels = print_constraint(pl.col("labels"))
col_labels = print_variable(pl.col("vars"))
columns = [
pl.when(pl.col("labels_first").is_not_null()).then(row_labels[0]),
pl.when(pl.col("labels_first").is_not_null()).then(row_labels[1]),
pl.when(pl.col("labels_first").is_not_null())
.then(pl.lit(":\n"))
.alias(":"),
pl.when(pl.col("is_first_in_group")).then(row_labels[0]),
pl.when(pl.col("is_first_in_group")).then(row_labels[1]),
pl.when(pl.col("is_first_in_group")).then(pl.lit(":\n")).alias(":"),
*signed_number(pl.col("coeffs")),
pl.when(pl.col("vars").is_not_null()).then(col_labels[0]),
pl.when(pl.col("vars").is_not_null()).then(col_labels[1]),
col_labels[0],
col_labels[1],
pl.when(pl.col("is_last_in_group")).then(pl.lit("\n")),
pl.when(pl.col("is_last_in_group")).then(pl.col("sign")),

@@ -499,7 +470,3 @@ pl.when(pl.col("is_last_in_group")).then(pl.lit(" ")),

kwargs: Any = dict(
separator=" ", null_value="", quote_style="never", include_header=False
)
formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
formatted.write_csv(f, **kwargs)
_format_and_write(df, columns, f)

@@ -506,0 +473,0 @@ # in the future, we could use lazy dataframes when they support appending

@@ -137,2 +137,3 @@ """

"_force_dim_names",
"_auto_mask",
"_solver_dir",

@@ -149,2 +150,3 @@ "solver_model",

force_dim_names: bool = False,
auto_mask: bool = False,
) -> None:

@@ -169,2 +171,6 @@ """

may become safer.
auto_mask : bool
Whether to automatically mask variables and constraints where
bounds, coefficients, or RHS values contain NaN. The default is
False.

@@ -190,2 +196,3 @@ Returns

self._force_dim_names: bool = bool(force_dim_names)
self._auto_mask: bool = bool(auto_mask)
self._solver_dir: Path = Path(

@@ -322,2 +329,14 @@ gettempdir() if solver_dir is None else solver_dir

@property
def auto_mask(self) -> bool:
"""
If True, automatically mask variables and constraints where bounds,
coefficients, or RHS values contain NaN.
"""
return self._auto_mask
@auto_mask.setter
def auto_mask(self, value: bool) -> None:
self._auto_mask = bool(value)
@property
def solver_dir(self) -> Path:

@@ -349,2 +368,3 @@ """

"force_dim_names",
"auto_mask",
]

@@ -541,2 +561,15 @@

# Auto-mask based on NaN in bounds (use numpy for speed)
if self.auto_mask:
auto_mask_values = ~np.isnan(data.lower.values) & ~np.isnan(
data.upper.values
)
auto_mask_arr = DataArray(
auto_mask_values, coords=data.coords, dims=data.dims
)
if mask is not None:
mask = mask & auto_mask_arr
else:
mask = auto_mask_arr
start = self._xCounter

@@ -548,3 +581,4 @@ end = start + data.labels.size

if mask is not None:
data.labels.values = data.labels.where(mask, -1).values
# Use numpy where for speed (38x faster than xarray where)
data.labels.values = np.where(mask.values, data.labels.values, -1)

@@ -667,2 +701,10 @@ data = data.assign_attrs(

# Capture original RHS for auto-masking before constraint creation
# (NaN values in RHS are lost during constraint creation)
# Use numpy for speed instead of xarray's notnull()
original_rhs_mask = None
if self.auto_mask and rhs is not None:
rhs_da = as_dataarray(rhs)
original_rhs_mask = (rhs_da.coords, rhs_da.dims, ~np.isnan(rhs_da.values))
if isinstance(lhs, LinearExpression):

@@ -720,3 +762,29 @@ if sign is None or rhs is None:

)
# Broadcast mask to match data shape for correct numpy where behavior
if mask.shape != data.labels.shape:
mask, _ = xr.broadcast(mask, data.labels)
# Auto-mask based on null expressions or NaN RHS (use numpy for speed)
if self.auto_mask:
# Check if expression is null: all vars == -1
# Use max() instead of all() - if max == -1, all are -1 (since valid vars >= 0)
# This is ~30% faster for large term dimensions
vars_all_invalid = data.vars.values.max(axis=-1) == -1
auto_mask_values = ~vars_all_invalid
if original_rhs_mask is not None:
coords, dims, rhs_notnull = original_rhs_mask
# Broadcast RHS mask to match data shape if needed
if rhs_notnull.shape != auto_mask_values.shape:
rhs_da = DataArray(rhs_notnull, coords=coords, dims=dims)
rhs_da, _ = xr.broadcast(rhs_da, data.labels)
rhs_notnull = rhs_da.values
auto_mask_values = auto_mask_values & rhs_notnull
auto_mask_arr = DataArray(
auto_mask_values, coords=data.labels.coords, dims=data.labels.dims
)
if mask is not None:
mask = mask & auto_mask_arr
else:
mask = auto_mask_arr
self.check_force_dim_names(data)

@@ -730,3 +798,4 @@

if mask is not None:
data.labels.values = data.labels.where(mask, -1).values
# Use numpy where for speed (38x faster than xarray where)
data.labels.values = np.where(mask.values, data.labels.values, -1)

@@ -733,0 +802,0 @@ data = data.assign_attrs(label_range=(start, end), name=name)

@@ -17,2 +17,3 @@ """

Any,
cast,
overload,

@@ -424,3 +425,5 @@ )

expr = self.to_linexpr()
return expr._multiply_by_linear_expression(expr)
return cast(
"QuadraticExpression", expr._multiply_by_linear_expression(expr)
)
raise ValueError("Can only raise to the power of 2")

@@ -427,0 +430,0 @@

@@ -31,5 +31,5 @@ # file generated by setuptools-scm

__version__ = version = '0.6.1'
__version_tuple__ = version_tuple = (0, 6, 1)
__version__ = version = '0.6.2'
__version_tuple__ = version_tuple = (0, 6, 2)
__commit_id__ = commit_id = 'g67e3484dc'
__commit_id__ = commit_id = 'gc9f83bbd3'
Metadata-Version: 2.4
Name: linopy
Version: 0.6.1
Version: 0.6.2
Summary: Linear optimization with N-D labeled arrays in Python

@@ -30,3 +30,3 @@ Author-email: Fabian Hofmann <fabianmarikhofmann@gmail.com>

Requires-Dist: dask>=0.18.0
Requires-Dist: polars
Requires-Dist: polars>=1.31
Requires-Dist: tqdm

@@ -43,2 +43,3 @@ Requires-Dist: deprecation

Requires-Dist: sphinx_book_theme==1.1.3; extra == "docs"
Requires-Dist: sphinx-copybutton==0.5.2; extra == "docs"
Requires-Dist: nbsphinx==0.9.4; extra == "docs"

@@ -45,0 +46,0 @@ Requires-Dist: nbsphinx-link==1.3.0; extra == "docs"

@@ -36,3 +36,3 @@ [build-system]

"dask>=0.18.0",
"polars",
"polars>=1.31",
"tqdm",

@@ -56,2 +56,3 @@ "deprecation",

"sphinx_book_theme==1.1.3",
"sphinx-copybutton==0.5.2",
"nbsphinx==0.9.4",

@@ -58,0 +59,0 @@ "nbsphinx-link==1.3.0",

@@ -26,2 +26,3 @@ #!/usr/bin/env python3

iterate_slices,
maybe_group_terms_polars,
)

@@ -741,1 +742,18 @@ from linopy.testing import assert_linequal, assert_varequal

assert is_constant(cv)
def test_maybe_group_terms_polars_no_duplicates() -> None:
"""Fast path: distinct (labels, vars) pairs skip group_by."""
df = pl.DataFrame({"labels": [0, 0], "vars": [1, 2], "coeffs": [3.0, 4.0]})
result = maybe_group_terms_polars(df)
assert result.shape == (2, 3)
assert result.columns == ["labels", "vars", "coeffs"]
assert result["coeffs"].to_list() == [3.0, 4.0]
def test_maybe_group_terms_polars_with_duplicates() -> None:
"""Slow path: duplicate (labels, vars) pairs trigger group_by."""
df = pl.DataFrame({"labels": [0, 0], "vars": [1, 1], "coeffs": [3.0, 4.0]})
result = maybe_group_terms_polars(df)
assert result.shape == (1, 3)
assert result["coeffs"].to_list() == [7.0]

@@ -440,2 +440,16 @@ #!/usr/bin/env python3

def test_constraint_to_polars_mixed_signs(m: Model, x: linopy.Variable) -> None:
"""Test to_polars when a constraint has mixed sign values across dims."""
# Create a constraint, then manually patch the sign to have mixed values
m.add_constraints(x >= 0, name="mixed")
con = m.constraints["mixed"]
# Replace sign data with mixed signs across the first dimension
n = con.data.sizes["first"]
signs = np.array(["<=" if i % 2 == 0 else ">=" for i in range(n)])
con.data["sign"] = xr.DataArray(signs, dims=con.data["sign"].dims)
df = con.to_polars()
assert isinstance(df, pl.DataFrame)
assert set(df["sign"].to_list()) == {"<=", ">="}
def test_constraint_assignment_with_anonymous_constraints(

@@ -442,0 +456,0 @@ m: Model, x: linopy.Variable, y: linopy.Variable

@@ -339,1 +339,38 @@ #!/usr/bin/env python3

gurobipy.read(str(fn))
def test_to_file_lp_same_sign_constraints(tmp_path: Path) -> None:
"""Test LP writing when all constraints have the same sign operator."""
m = Model()
N = np.arange(5)
x = m.add_variables(coords=[N], name="x")
# All constraints use <=
m.add_constraints(x <= 10, name="upper")
m.add_constraints(x <= 20, name="upper2")
m.add_objective(x.sum())
fn = tmp_path / "same_sign.lp"
m.to_file(fn)
content = fn.read_text()
assert "s.t." in content
assert "<=" in content
def test_to_file_lp_mixed_sign_constraints(tmp_path: Path) -> None:
"""Test LP writing when constraints have different sign operators."""
m = Model()
N = np.arange(5)
x = m.add_variables(coords=[N], name="x")
# Mix of <= and >= constraints in the same container
m.add_constraints(x <= 10, name="upper")
m.add_constraints(x >= 1, name="lower")
m.add_constraints(2 * x == 8, name="eq")
m.add_objective(x.sum())
fn = tmp_path / "mixed_sign.lp"
m.to_file(fn)
content = fn.read_text()
assert "s.t." in content
assert "<=" in content
assert ">=" in content
assert "=" in content

@@ -1316,1 +1316,87 @@ #!/usr/bin/env python3

)
def test_constant_only_expression_mul_dataarray(m: Model) -> None:
const_arr = xr.DataArray([2, 3], dims=["dim_0"])
const_expr = LinearExpression(const_arr, m)
assert const_expr.is_constant
assert const_expr.nterm == 0
data_arr = xr.DataArray([10, 20], dims=["dim_0"])
expected_const = const_arr * data_arr
result = const_expr * data_arr
assert isinstance(result, LinearExpression)
assert result.is_constant
assert (result.const == expected_const).all()
result_rev = data_arr * const_expr
assert isinstance(result_rev, LinearExpression)
assert result_rev.is_constant
assert (result_rev.const == expected_const).all()
def test_constant_only_expression_mul_linexpr_with_vars(m: Model, x: Variable) -> None:
const_arr = xr.DataArray([2, 3], dims=["dim_0"])
const_expr = LinearExpression(const_arr, m)
assert const_expr.is_constant
assert const_expr.nterm == 0
expr_with_vars = 1 * x + 5
expected_coeffs = const_arr
expected_const = const_arr * 5
result = const_expr * expr_with_vars
assert isinstance(result, LinearExpression)
assert (result.coeffs == expected_coeffs).all()
assert (result.const == expected_const).all()
result_rev = expr_with_vars * const_expr
assert isinstance(result_rev, LinearExpression)
assert (result_rev.coeffs == expected_coeffs).all()
assert (result_rev.const == expected_const).all()
def test_constant_only_expression_mul_constant_only(m: Model) -> None:
const_arr = xr.DataArray([2, 3], dims=["dim_0"])
const_arr2 = xr.DataArray([4, 5], dims=["dim_0"])
const_expr = LinearExpression(const_arr, m)
const_expr2 = LinearExpression(const_arr2, m)
assert const_expr.is_constant
assert const_expr2.is_constant
expected_const = const_arr * const_arr2
result = const_expr * const_expr2
assert isinstance(result, LinearExpression)
assert result.is_constant
assert (result.const == expected_const).all()
result_rev = const_expr2 * const_expr
assert isinstance(result_rev, LinearExpression)
assert result_rev.is_constant
assert (result_rev.const == expected_const).all()
def test_constant_only_expression_mul_linexpr_with_vars_and_const(
m: Model, x: Variable
) -> None:
const_arr = xr.DataArray([2, 3], dims=["dim_0"])
const_expr = LinearExpression(const_arr, m)
assert const_expr.is_constant
expr_with_vars_and_const = 4 * x + 10
expected_coeffs = const_arr * 4
expected_const = const_arr * 10
result = const_expr * expr_with_vars_and_const
assert isinstance(result, LinearExpression)
assert not result.is_constant
assert (result.coeffs == expected_coeffs).all()
assert (result.const == expected_const).all()
result_rev = expr_with_vars_and_const * const_expr
assert isinstance(result_rev, LinearExpression)
assert not result_rev.is_constant
assert (result_rev.coeffs == expected_coeffs).all()
assert (result_rev.const == expected_const).all()

@@ -1094,2 +1094,66 @@ #!/usr/bin/env python3

@pytest.fixture
def auto_mask_variable_model() -> Model:
"""Model with auto_mask=True and NaN in variable bounds."""
m = Model(auto_mask=True)
x = m.add_variables(lower=0, coords=[range(10)], name="x")
lower = pd.Series([0.0] * 8 + [np.nan, np.nan], range(10))
y = m.add_variables(lower=lower, name="y") # NaN bounds auto-masked
m.add_constraints(x + y, GREATER_EQUAL, 10)
m.add_constraints(y, GREATER_EQUAL, 0)
m.add_objective(2 * x + y)
return m
@pytest.fixture
def auto_mask_constraint_model() -> Model:
"""Model with auto_mask=True and NaN in constraint RHS."""
m = Model(auto_mask=True)
x = m.add_variables(lower=0, coords=[range(10)], name="x")
y = m.add_variables(lower=0, coords=[range(10)], name="y")
rhs = pd.Series([10.0] * 8 + [np.nan, np.nan], range(10))
m.add_constraints(x + y, GREATER_EQUAL, rhs) # NaN rhs auto-masked
m.add_constraints(x + y, GREATER_EQUAL, 5)
m.add_objective(2 * x + y)
return m
@pytest.mark.parametrize("solver,io_api,explicit_coordinate_names", params)
def test_auto_mask_variable_model(
auto_mask_variable_model: Model,
solver: str,
io_api: str,
explicit_coordinate_names: bool,
) -> None:
"""Test that auto_mask=True correctly masks variables with NaN bounds."""
auto_mask_variable_model.solve(
solver, io_api=io_api, explicit_coordinate_names=explicit_coordinate_names
)
y = auto_mask_variable_model.variables.y
# Same assertions as test_masked_variable_model
assert y.solution[-2:].isnull().all()
assert y.solution[:-2].notnull().all()
@pytest.mark.parametrize("solver,io_api,explicit_coordinate_names", params)
def test_auto_mask_constraint_model(
auto_mask_constraint_model: Model,
solver: str,
io_api: str,
explicit_coordinate_names: bool,
) -> None:
"""Test that auto_mask=True correctly masks constraints with NaN RHS."""
auto_mask_constraint_model.solve(
solver, io_api=io_api, explicit_coordinate_names=explicit_coordinate_names
)
# Same assertions as test_masked_constraint_model
assert (auto_mask_constraint_model.solution.y[:-2] == 10).all()
assert (auto_mask_constraint_model.solution.y[-2:] == 5).all()
# def init_model_large():

@@ -1096,0 +1160,0 @@ # m = Model()

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display