linopy - PyPI Package Compare versions

+639

benchmark/benchmark_auto_mask.py

		#!/usr/bin/env python3
		"""
		Benchmark comparing manual masking vs auto_mask for models with NaN coefficients.

		This creates a realistic scenario: a multi-period dispatch model where:
		- Not all generators are available in all time periods (NaN in capacity bounds)
		- Not all transmission lines exist between all regions (NaN in flow limits)
		"""

		import sys
		from pathlib import Path

		# Ensure we use the local linopy installation
		project_root = Path(__file__).parent.parent
		sys.path.insert(0, str(project_root))

		import time # noqa: E402
		from typing import Any # noqa: E402

		import numpy as np # noqa: E402
		import pandas as pd # noqa: E402

		from linopy import GREATER_EQUAL, Model # noqa: E402


		def create_nan_data(
		n_generators: int = 500,
		n_periods: int = 100,
		n_regions: int = 20,
		nan_fraction_gen: float = 0.3, # 30% of generator-period combinations unavailable
		nan_fraction_lines: float = 0.7, # 70% of region pairs have no direct line
		seed: int = 42,
		) -> dict[str, Any]:
		"""Create realistic input data with NaN patterns."""
		rng = np.random.default_rng(seed)

		generators = pd.Index(range(n_generators), name="generator")
		periods = pd.Index(range(n_periods), name="period")
		regions = pd.Index(range(n_regions), name="region")

		# Generator capacities - some generators unavailable in some periods (maintenance, etc.)
		gen_capacity = pd.DataFrame(
		rng.uniform(50, 500, size=(n_generators, n_periods)),
		index=generators,
		columns=periods,
		)
		# Set random entries to NaN (generator unavailable)
		nan_mask_gen = rng.random((n_generators, n_periods)) < nan_fraction_gen
		gen_capacity.values[nan_mask_gen] = np.nan

		# Generator costs
		gen_cost = pd.Series(rng.uniform(10, 100, n_generators), index=generators)

		# Generator to region mapping
		gen_region = pd.Series(rng.integers(0, n_regions, n_generators), index=generators)

		# Demand per region per period
		demand = pd.DataFrame(
		rng.uniform(100, 1000, size=(n_regions, n_periods)),
		index=regions,
		columns=periods,
		)

		# Transmission line capacities - sparse network (not all regions connected)
		# Use distinct dimension names to avoid xarray duplicate dimension issues
		regions_from = pd.Index(range(n_regions), name="region_from")
		regions_to = pd.Index(range(n_regions), name="region_to")

		line_capacity = pd.DataFrame(
		np.nan,
		index=regions_from,
		columns=regions_to,
		dtype=float, # Start with all NaN
		)
		# Only some region pairs have lines
		for i in range(n_regions):
		for j in range(n_regions):
		if i != j and rng.random() > nan_fraction_lines:
		line_capacity.loc[i, j] = rng.uniform(100, 500)

		return {
		"generators": generators,
		"periods": periods,
		"regions": regions,
		"regions_from": regions_from,
		"regions_to": regions_to,
		"gen_capacity": gen_capacity,
		"gen_cost": gen_cost,
		"gen_region": gen_region,
		"demand": demand,
		"line_capacity": line_capacity,
		}


		def build_model_manual_mask(data: dict[str, Any]) -> Model:
		"""Build model using manual masking (traditional approach)."""
		m = Model()

		generators = data["generators"]
		periods = data["periods"]
		regions = data["regions"]
		regions_from = data["regions_from"]
		regions_to = data["regions_to"]
		gen_capacity = data["gen_capacity"]
		gen_cost = data["gen_cost"]
		gen_region = data["gen_region"]
		demand = data["demand"]
		line_capacity = data["line_capacity"]

		# Generator dispatch variables - manually mask where capacity is NaN
		gen_mask = gen_capacity.notnull()
		dispatch = m.add_variables(
		lower=0,
		upper=gen_capacity,
		coords=[generators, periods],
		name="dispatch",
		mask=gen_mask,
		)

		# Flow variables between regions - manually mask where no line exists
		flow_mask = line_capacity.notnull()
		flow = m.add_variables(
		lower=-line_capacity.abs(),
		upper=line_capacity.abs(),
		coords=[regions_from, regions_to],
		name="flow",
		mask=flow_mask,
		)

		# Energy balance constraint per region per period
		for r in regions:
		gens_in_region = generators[gen_region == r]
		gen_sum = dispatch.loc[gens_in_region, :].sum("generator")

		# Net flow into region
		flow_in = flow.loc[:, r].sum("region_from")
		flow_out = flow.loc[r, :].sum("region_to")

		m.add_constraints(
		gen_sum + flow_in - flow_out,
		GREATER_EQUAL,
		demand.loc[r],
		name=f"balance_r{r}",
		)

		# Objective: minimize generation cost
		obj = (dispatch * gen_cost).sum()
		m.add_objective(obj)

		return m


		def build_model_auto_mask(data: dict[str, Any]) -> Model:
		"""Build model using auto_mask=True (new approach)."""
		m = Model(auto_mask=True)

		generators = data["generators"]
		periods = data["periods"]
		regions = data["regions"]
		regions_from = data["regions_from"]
		regions_to = data["regions_to"]
		gen_capacity = data["gen_capacity"]
		gen_cost = data["gen_cost"]
		gen_region = data["gen_region"]
		demand = data["demand"]
		line_capacity = data["line_capacity"]

		# Generator dispatch variables - auto-masked where capacity is NaN
		dispatch = m.add_variables(
		lower=0,
		upper=gen_capacity, # NaN values will be auto-masked
		coords=[generators, periods],
		name="dispatch",
		)

		# Flow variables between regions - auto-masked where no line exists
		flow = m.add_variables(
		lower=-line_capacity.abs(),
		upper=line_capacity.abs(), # NaN values will be auto-masked
		coords=[regions_from, regions_to],
		name="flow",
		)

		# Energy balance constraint per region per period
		for r in regions:
		gens_in_region = generators[gen_region == r]
		gen_sum = dispatch.loc[gens_in_region, :].sum("generator")

		# Net flow into region
		flow_in = flow.loc[:, r].sum("region_from")
		flow_out = flow.loc[r, :].sum("region_to")

		m.add_constraints(
		gen_sum + flow_in - flow_out,
		GREATER_EQUAL,
		demand.loc[r],
		name=f"balance_r{r}",
		)

		# Objective: minimize generation cost
		obj = (dispatch * gen_cost).sum()
		m.add_objective(obj)

		return m


		def build_model_no_mask(data: dict[str, Any]) -> Model:
		"""Build model WITHOUT any masking (NaN values left in place)."""
		m = Model()

		generators = data["generators"]
		periods = data["periods"]
		regions = data["regions"]
		regions_from = data["regions_from"]
		regions_to = data["regions_to"]
		gen_capacity = data["gen_capacity"]
		gen_cost = data["gen_cost"]
		gen_region = data["gen_region"]
		demand = data["demand"]
		line_capacity = data["line_capacity"]

		# Generator dispatch variables - NO masking, NaN bounds left in place
		dispatch = m.add_variables(
		lower=0,
		upper=gen_capacity, # Contains NaN values
		coords=[generators, periods],
		name="dispatch",
		)

		# Flow variables between regions - NO masking
		flow = m.add_variables(
		lower=-line_capacity.abs(),
		upper=line_capacity.abs(), # Contains NaN values
		coords=[regions_from, regions_to],
		name="flow",
		)

		# Energy balance constraint per region per period
		for r in regions:
		gens_in_region = generators[gen_region == r]
		gen_sum = dispatch.loc[gens_in_region, :].sum("generator")

		# Net flow into region
		flow_in = flow.loc[:, r].sum("region_from")
		flow_out = flow.loc[r, :].sum("region_to")

		m.add_constraints(
		gen_sum + flow_in - flow_out,
		GREATER_EQUAL,
		demand.loc[r],
		name=f"balance_r{r}",
		)

		# Objective: minimize generation cost
		obj = (dispatch * gen_cost).sum()
		m.add_objective(obj)

		return m


		def benchmark(
		n_generators: int = 500,
		n_periods: int = 100,
		n_regions: int = 20,
		n_runs: int = 3,
		solve: bool = True,
		) -> dict[str, Any]:
		"""Run benchmark comparing no masking, manual masking, and auto masking."""
		print("=" * 70)
		print("BENCHMARK: No Masking vs Manual Masking vs Auto-Masking")
		print("=" * 70)
		print("\nModel size:")
		print(f" - Generators: {n_generators}")
		print(f" - Time periods: {n_periods}")
		print(f" - Regions: {n_regions}")
		print(f" - Potential dispatch vars: {n_generators * n_periods:,}")
		print(f" - Potential flow vars: {n_regions * n_regions:,}")
		print(f"\nRunning {n_runs} iterations each...\n")

		# Generate data once
		data = create_nan_data(
		n_generators=n_generators,
		n_periods=n_periods,
		n_regions=n_regions,
		)

		# Count NaN entries
		gen_nan_count = data["gen_capacity"].isna().sum().sum()
		gen_total = data["gen_capacity"].size
		line_nan_count = data["line_capacity"].isna().sum().sum()
		line_total = data["line_capacity"].size

		print("NaN statistics:")
		print(
		f" - Generator capacity: {gen_nan_count:,}/{gen_total:,} "
		f"({100 * gen_nan_count / gen_total:.1f}% NaN)"
		)
		print(
		f" - Line capacity: {line_nan_count:,}/{line_total:,} "
		f"({100 * line_nan_count / line_total:.1f}% NaN)"
		)
		print()

		# Benchmark NO masking (baseline)
		no_mask_times = []
		for i in range(n_runs):
		start = time.perf_counter()
		m_no_mask = build_model_no_mask(data)
		elapsed = time.perf_counter() - start
		no_mask_times.append(elapsed)
		if i == 0:
		# Can't use nvars directly as it will fail with NaN values
		# Instead count total variable labels (including those with NaN bounds)
		no_mask_nvars = sum(
		m_no_mask.variables[k].labels.size for k in m_no_mask.variables
		)
		no_mask_ncons = m_no_mask.ncons

		# Benchmark manual masking
		manual_times = []
		for i in range(n_runs):
		start = time.perf_counter()
		m_manual = build_model_manual_mask(data)
		elapsed = time.perf_counter() - start
		manual_times.append(elapsed)
		if i == 0:
		manual_nvars = m_manual.nvars
		manual_ncons = m_manual.ncons

		# Benchmark auto masking
		auto_times = []
		for i in range(n_runs):
		start = time.perf_counter()
		m_auto = build_model_auto_mask(data)
		elapsed = time.perf_counter() - start
		auto_times.append(elapsed)
		if i == 0:
		auto_nvars = m_auto.nvars
		auto_ncons = m_auto.ncons

		# Results
		print("-" * 70)
		print("RESULTS: Model Building Time")
		print("-" * 70)

		print("\nNo masking (baseline):")
		print(f" - Mean time: {np.mean(no_mask_times):.3f}s")
		print(f" - Variables: {no_mask_nvars:,} (includes NaN-bounded vars)")
		print(f" - Constraints: {no_mask_ncons:,}")

		print("\nManual masking:")
		print(f" - Mean time: {np.mean(manual_times):.3f}s")
		print(f" - Variables: {manual_nvars:,}")
		print(f" - Constraints: {manual_ncons:,}")
		manual_overhead = np.mean(manual_times) - np.mean(no_mask_times)
		print(f" - Overhead vs no-mask: {manual_overhead * 1000:+.1f}ms")

		print("\nAuto masking:")
		print(f" - Mean time: {np.mean(auto_times):.3f}s")
		print(f" - Variables: {auto_nvars:,}")
		print(f" - Constraints: {auto_ncons:,}")
		auto_overhead = np.mean(auto_times) - np.mean(no_mask_times)
		print(f" - Overhead vs no-mask: {auto_overhead * 1000:+.1f}ms")

		# Comparison
		print("\nComparison (Auto vs Manual):")
		speedup = np.mean(manual_times) / np.mean(auto_times)
		diff = np.mean(auto_times) - np.mean(manual_times)
		if speedup > 1:
		print(f" - Auto-mask is {speedup:.2f}x FASTER than manual")
		else:
		print(f" - Auto-mask is {1 / speedup:.2f}x SLOWER than manual")
		print(f" - Time difference: {diff * 1000:+.1f}ms")

		# Verify models are equivalent
		print("\nVerification:")
		print(f" - Manual == Auto variables: {manual_nvars == auto_nvars}")
		print(f" - Manual == Auto constraints: {manual_ncons == auto_ncons}")
		print(f" - Variables masked out: {no_mask_nvars - manual_nvars:,}")

		results = {
		"n_generators": n_generators,
		"n_periods": n_periods,
		"potential_vars": n_generators * n_periods,
		"no_mask_time": np.mean(no_mask_times),
		"manual_time": np.mean(manual_times),
		"auto_time": np.mean(auto_times),
		"nvars": manual_nvars,
		"masked_out": no_mask_nvars - manual_nvars,
		}

		# LP file write benchmark
		print("\n" + "-" * 70)
		print("RESULTS: LP File Write Time & Size")
		print("-" * 70)

		import os
		import tempfile

		# Write LP file for manual masked model
		with tempfile.NamedTemporaryFile(suffix=".lp", delete=False) as f:
		manual_lp_path = f.name
		start = time.perf_counter()
		m_manual.to_file(manual_lp_path)
		manual_write_time = time.perf_counter() - start
		manual_lp_size = os.path.getsize(manual_lp_path) / (1024 * 1024) # MB
		os.unlink(manual_lp_path)

		# Write LP file for auto masked model
		with tempfile.NamedTemporaryFile(suffix=".lp", delete=False) as f:
		auto_lp_path = f.name
		start = time.perf_counter()
		m_auto.to_file(auto_lp_path)
		auto_write_time = time.perf_counter() - start
		auto_lp_size = os.path.getsize(auto_lp_path) / (1024 * 1024) # MB
		os.unlink(auto_lp_path)

		print("\nManual masking:")
		print(f" - Write time: {manual_write_time:.3f}s")
		print(f" - File size: {manual_lp_size:.2f} MB")

		print("\nAuto masking:")
		print(f" - Write time: {auto_write_time:.3f}s")
		print(f" - File size: {auto_lp_size:.2f} MB")

		print(f"\nFiles identical: {abs(manual_lp_size - auto_lp_size) < 0.01}")

		results["manual_write_time"] = manual_write_time
		results["auto_write_time"] = auto_write_time
		results["lp_size_mb"] = manual_lp_size

		# Quick solve comparison
		if solve:
		print("\n" + "-" * 70)
		print("RESULTS: Solve Time (single run)")
		print("-" * 70)

		start = time.perf_counter()
		m_manual.solve("highs", io_api="direct")
		manual_solve = time.perf_counter() - start

		start = time.perf_counter()
		m_auto.solve("highs", io_api="direct")
		auto_solve = time.perf_counter() - start

		print(f"\nManual masking solve: {manual_solve:.3f}s")
		print(f"Auto masking solve: {auto_solve:.3f}s")

		if m_manual.objective.value is not None and m_auto.objective.value is not None:
		print(
		f"Objective values match: "
		f"{np.isclose(m_manual.objective.value, m_auto.objective.value)}"
		)
		print(f" - Manual: {m_manual.objective.value:.2f}")
		print(f" - Auto: {m_auto.objective.value:.2f}")

		return results


		def benchmark_code_simplicity() -> None:
		"""Show the code simplicity benefit of auto_mask."""
		print("\n" + "=" * 70)
		print("CODE COMPARISON: Manual vs Auto-Mask")
		print("=" * 70)

		manual_code = """
		# Manual masking - must create mask explicitly
		gen_mask = gen_capacity.notnull()
		dispatch = m.add_variables(
		lower=0,
		upper=gen_capacity,
		coords=[generators, periods],
		name="dispatch",
		mask=gen_mask, # Extra step required
		)
		"""

		auto_code = """
		# Auto masking - just pass the data with NaN
		m = Model(auto_mask=True)
		dispatch = m.add_variables(
		lower=0,
		upper=gen_capacity, # NaN auto-masked
		coords=[generators, periods],
		name="dispatch",
		)
		"""

		print("\nManual masking approach:")
		print(manual_code)
		print("Auto-mask approach:")
		print(auto_code)
		print("Benefits of auto_mask:")
		print(" - Less boilerplate code")
		print(" - No need to manually track which arrays have NaN")
		print(" - Reduces risk of forgetting to mask")
		print(" - Cleaner, more declarative style")


		def benchmark_constraint_masking(n_runs: int = 3) -> None:
		"""Benchmark auto-masking of constraints with NaN in RHS."""
		print("\n" + "=" * 70)
		print("BENCHMARK: Constraint Auto-Masking (NaN in RHS)")
		print("=" * 70)

		n_vars = 1000
		n_constraints = 5000
		nan_fraction = 0.3

		rng = np.random.default_rng(42)
		idx = pd.Index(range(n_vars), name="i")
		con_idx = pd.Index(range(n_constraints), name="c")

		# Create RHS with NaN values
		rhs = pd.Series(rng.uniform(1, 100, n_constraints), index=con_idx)
		nan_mask = rng.random(n_constraints) < nan_fraction
		rhs.values[nan_mask] = np.nan

		print("\nModel size:")
		print(f" - Variables: {n_vars}")
		print(f" - Potential constraints: {n_constraints}")
		print(f" - NaN in RHS: {nan_mask.sum()} ({100 * nan_fraction:.0f}%)")
		print(f"\nRunning {n_runs} iterations each...\n")

		# Manual masking
		manual_times = []
		for i in range(n_runs):
		start = time.perf_counter()
		m = Model()
		x = m.add_variables(lower=0, coords=[idx], name="x")
		coeffs = pd.DataFrame(
		rng.uniform(0.1, 1, (n_constraints, n_vars)), index=con_idx, columns=idx
		)
		con_mask = rhs.notnull() # Manual mask creation
		m.add_constraints((coeffs * x).sum("i"), GREATER_EQUAL, rhs, mask=con_mask)
		m.add_objective(x.sum())
		elapsed = time.perf_counter() - start
		manual_times.append(elapsed)
		if i == 0:
		manual_ncons = m.ncons

		# Auto masking
		auto_times = []
		for i in range(n_runs):
		start = time.perf_counter()
		m = Model(auto_mask=True)
		x = m.add_variables(lower=0, coords=[idx], name="x")
		coeffs = pd.DataFrame(
		rng.uniform(0.1, 1, (n_constraints, n_vars)), index=con_idx, columns=idx
		)
		m.add_constraints((coeffs * x).sum("i"), GREATER_EQUAL, rhs) # No mask needed
		m.add_objective(x.sum())
		elapsed = time.perf_counter() - start
		auto_times.append(elapsed)
		if i == 0:
		auto_ncons = m.ncons

		print("-" * 70)
		print("RESULTS: Constraint Building Time")
		print("-" * 70)
		print("\nManual masking:")
		print(f" - Mean time: {np.mean(manual_times):.3f}s")
		print(f" - Active constraints: {manual_ncons:,}")

		print("\nAuto masking:")
		print(f" - Mean time: {np.mean(auto_times):.3f}s")
		print(f" - Active constraints: {auto_ncons:,}")

		overhead = np.mean(auto_times) - np.mean(manual_times)
		print(f"\nOverhead: {overhead * 1000:.1f}ms")
		print(f"Same constraint count: {manual_ncons == auto_ncons}")


		def print_summary_table(results: list[dict[str, Any]]) -> None:
		"""Print a summary table of all benchmark results."""
		print("\n" + "=" * 110)
		print("SUMMARY TABLE: Model Building & LP Write Times")
		print("=" * 110)
		print(
		f"{'Model':<12} {'Pot.Vars':>10} {'Act.Vars':>10} {'Masked':>8} "
		f"{'No-Mask':>9} {'Manual':>9} {'Auto':>9} {'Diff':>8} "
		f"{'LP Write':>9} {'LP Size':>9}"
		)
		print("-" * 110)
		for r in results:
		name = f"{r['n_generators']}x{r['n_periods']}"
		lp_write = r.get("manual_write_time", 0) * 1000
		lp_size = r.get("lp_size_mb", 0)
		print(
		f"{name:<12} {r['potential_vars']:>10,} {r['nvars']:>10,} "
		f"{r['masked_out']:>8,} {r['no_mask_time'] * 1000:>8.0f}ms "
		f"{r['manual_time'] * 1000:>8.0f}ms {r['auto_time'] * 1000:>8.0f}ms "
		f"{(r['auto_time'] - r['manual_time']) * 1000:>+7.0f}ms "
		f"{lp_write:>8.0f}ms {lp_size:>8.1f}MB"
		)
		print("-" * 110)
		print("Pot.Vars = Potential variables, Act.Vars = Active (non-masked) variables")
		print("Masked = Variables masked out due to NaN bounds")
		print("Diff = Auto-mask time minus Manual mask time (negative = faster)")
		print("LP Write = Time to write LP file, LP Size = LP file size in MB")


		if __name__ == "__main__":
		all_results = []

		# Run benchmarks with different sizes
		print("\n### SMALL MODEL ###")
		all_results.append(
		benchmark(n_generators=100, n_periods=50, n_regions=10, n_runs=5, solve=False)
		)

		print("\n\n### MEDIUM MODEL ###")
		all_results.append(
		benchmark(n_generators=500, n_periods=100, n_regions=20, n_runs=3, solve=False)
		)

		print("\n\n### LARGE MODEL ###")
		all_results.append(
		benchmark(n_generators=1000, n_periods=200, n_regions=30, n_runs=3, solve=False)
		)

		print("\n\n### VERY LARGE MODEL ###")
		all_results.append(
		benchmark(n_generators=2000, n_periods=500, n_regions=40, n_runs=3, solve=False)
		)

		print("\n\n### EXTRA LARGE MODEL ###")
		all_results.append(
		benchmark(n_generators=5000, n_periods=500, n_regions=50, n_runs=2, solve=False)
		)

		# Print summary table
		print_summary_table(all_results)

		# Run constraint benchmark
		benchmark_constraint_masking()

		# Show code comparison
		benchmark_code_simplicity()

+4

-0

doc/conf.py

		@@ -45,4 +45,8 @@ # Configuration file for the Sphinx documentation builder.
		"sphinx.ext.imgconverter", # for SVG conversion
		"sphinx_copybutton",
		]

		copybutton_prompt_text = r">>> \|\.\.\. \|\$ \|In \[\d*\]: \| {2,5}\.\.\.: \| {5,8}: "
		copybutton_prompt_is_regexp = True

		# Add any paths that contain templates here, relative to this directory.
		@@ -49,0 +53,0 @@ templates_path = ["_templates"]

+18

-1

doc/release_notes.rst

		Release Notes
		=============

		.. Upcoming Version
		Upcoming Version
		----------------

		Version 0.6.2
		--------------

		Features

		* Add ``auto_mask`` parameter to ``Model`` class that automatically masks variables and constraints where bounds, coefficients, or RHS values contain NaN. This eliminates the need to manually create mask arrays when working with sparse or incomplete data.

		Performance

		* Speed up LP file writing by 2-2.7x on large models through Polars streaming engine, join-based constraint assembly, and reduced per-constraint overhead

		Bug Fixes

		* Fix multiplication of constant-only ``LinearExpression`` with other expressions
		* Fix docs and Gurobi license handling

		Version 0.6.1
		@@ -7,0 +24,0 @@ --------------

+3

-2

linopy.egg-info/PKG-INFO

		Metadata-Version: 2.4
		Name: linopy
		Version: 0.6.1
		Version: 0.6.2
		Summary: Linear optimization with N-D labeled arrays in Python
		@@ -30,3 +30,3 @@ Author-email: Fabian Hofmann <fabianmarikhofmann@gmail.com>
		Requires-Dist: dask>=0.18.0
		Requires-Dist: polars
		Requires-Dist: polars>=1.31
		Requires-Dist: tqdm
		@@ -43,2 +43,3 @@ Requires-Dist: deprecation
		Requires-Dist: sphinx_book_theme==1.1.3; extra == "docs"
		Requires-Dist: sphinx-copybutton==0.5.2; extra == "docs"
		Requires-Dist: nbsphinx==0.9.4; extra == "docs"
		@@ -45,0 +46,0 @@ Requires-Dist: nbsphinx-link==1.3.0; extra == "docs"

+2

-1

linopy.egg-info/requires.txt

		@@ -7,3 +7,3 @@ scipy
		dask>=0.18.0
		polars
		polars>=1.31
		tqdm
		@@ -39,2 +39,3 @@ deprecation
		sphinx_book_theme==1.1.3
		sphinx-copybutton==0.5.2
		nbsphinx==0.9.4
		@@ -41,0 +42,0 @@ nbsphinx-link==1.3.0

+1

-0

linopy.egg-info/SOURCES.txt

		@@ -28,2 +28,3 @@ .git-blame-ignore-revs
		benchmark/benchmark-overhead.png
		benchmark/benchmark_auto_mask.py
		benchmark/benchmark_resource-absolute.pdf
		@@ -30,0 +31,0 @@ benchmark/benchmark_resource-absolute.png

+19

-0

linopy/common.py

		@@ -452,2 +452,21 @@ #!/usr/bin/env python3

		def maybe_group_terms_polars(df: pl.DataFrame) -> pl.DataFrame:
		"""
		Group terms only if there are duplicate (labels, vars) pairs.

		This avoids the expensive group_by operation when terms already
		reference distinct variables (e.g. ``x - y`` has ``_term=2`` but
		no duplicates). When skipping, columns are reordered to match the
		output of ``group_terms_polars``.
		"""
		varcols = [c for c in df.columns if c.startswith("vars")]
		keys = [c for c in ["labels"] + varcols if c in df.columns]
		key_count = df.select(pl.struct(keys).n_unique()).item()
		if key_count < df.height:
		return group_terms_polars(df)
		# Match column order of group_terms (group-by keys, coeffs, rest)
		rest = [c for c in df.columns if c not in keys and c != "coeffs"]
		return df.select(keys + ["coeffs"] + rest)


		def save_join(*dataarrays: DataArray, integer_dtype: bool = False) -> Dataset:
		@@ -454,0 +473,0 @@ """

+30

-14

linopy/constraints.py

		@@ -43,6 +43,5 @@ """
		get_label_position,
		group_terms_polars,
		has_optimized_model,
		infer_schema_polars,
		iterate_slices,
		maybe_group_terms_polars,
		maybe_replace_signs,
		@@ -626,17 +625,34 @@ print_coord,
		long = filter_nulls_polars(long)
		long = group_terms_polars(long)
		if ds.sizes.get("_term", 1) > 1:
		long = maybe_group_terms_polars(long)
		check_has_nulls_polars(long, name=f"{self.type} {self.name}")

		short_ds = ds[[k for k in ds if "_term" not in ds[k].dims]]
		schema = infer_schema_polars(short_ds)
		schema["sign"] = pl.Enum(["=", "<=", ">="])
		short = to_polars(short_ds, schema=schema)
		short = filter_nulls_polars(short)
		check_has_nulls_polars(short, name=f"{self.type} {self.name}")
		# Build short DataFrame (labels, rhs, sign) without xarray broadcast.
		# Apply labels mask directly instead of filter_nulls_polars.
		labels_flat = ds["labels"].values.reshape(-1)
		mask = labels_flat != -1
		labels_masked = labels_flat[mask]
		rhs_flat = np.broadcast_to(ds["rhs"].values, ds["labels"].shape).reshape(-1)

		df = pl.concat([short, long], how="diagonal_relaxed").sort(["labels", "rhs"])
		# delete subsequent non-null rhs (happens is all vars per label are -1)
		is_non_null = df["rhs"].is_not_null()
		prev_non_is_null = is_non_null.shift(1).fill_null(False)
		df = df.filter(is_non_null & ~prev_non_is_null \| ~is_non_null)
		sign_values = ds["sign"].values
		sign_flat = np.broadcast_to(sign_values, ds["labels"].shape).reshape(-1)
		all_same_sign = len(sign_flat) > 0 and (
		sign_flat[0] == sign_flat[-1] and (sign_flat[0] == sign_flat).all()
		)

		short_data: dict = {
		"labels": labels_masked,
		"rhs": rhs_flat[mask],
		}
		if all_same_sign:
		short = pl.DataFrame(short_data).with_columns(
		pl.lit(sign_flat[0]).cast(pl.Enum(["=", "<=", ">="])).alias("sign")
		)
		else:
		short_data["sign"] = pl.Series(
		"sign", sign_flat[mask], dtype=pl.Enum(["=", "<=", ">="])
		)
		short = pl.DataFrame(short_data)

		df = long.join(short, on="labels", how="inner")
		return df[["labels", "coeffs", "vars", "sign", "rhs"]]
		@@ -643,0 +659,0 @@

+30

-63

linopy/io.py

		@@ -57,2 +57,17 @@ #!/usr/bin/env python3

		def _format_and_write(
		df: pl.DataFrame, columns: list[pl.Expr], f: BufferedWriter
		) -> None:
		"""
		Format columns via concat_str and write to file.

		Uses Polars streaming engine for better memory efficiency.
		"""
		df.lazy().select(pl.concat_str(columns, ignore_nulls=True)).collect(
		engine="streaming"
		).write_csv(
		f, separator=" ", null_value="", quote_style="never", include_header=False
		)


		def signed_number(expr: pl.Expr) -> tuple[pl.Expr, pl.Expr]:
		@@ -159,6 +174,3 @@ """
		]
		df = df.select(pl.concat_str(cols, ignore_nulls=True))
		df.write_csv(
		f, separator=" ", null_value="", quote_style="never", include_header=False
		)
		_format_and_write(df, cols, f)

		@@ -176,6 +188,3 @@
		f.write(b"+ [\n")
		df = df.select(pl.concat_str(cols, ignore_nulls=True))
		df.write_csv(
		f, separator=" ", null_value="", quote_style="never", include_header=False
		)
		_format_and_write(df, cols, f)
		f.write(b"] / 2\n")
		@@ -260,7 +269,3 @@

		kwargs: Any = dict(
		separator=" ", null_value="", quote_style="never", include_header=False
		)
		formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
		formatted.write_csv(f, **kwargs)
		_format_and_write(df, columns, f)

		@@ -303,7 +308,3 @@

		kwargs: Any = dict(
		separator=" ", null_value="", quote_style="never", include_header=False
		)
		formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
		formatted.write_csv(f, **kwargs)
		_format_and_write(df, columns, f)

		@@ -347,7 +348,3 @@

		kwargs: Any = dict(
		separator=" ", null_value="", quote_style="never", include_header=False
		)
		formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
		formatted.write_csv(f, **kwargs)
		_format_and_write(df, columns, f)

		@@ -408,7 +405,3 @@

		kwargs: Any = dict(
		separator=" ", null_value="", quote_style="never", include_header=False
		)
		formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
		formatted.write_csv(f, **kwargs)
		_format_and_write(df, columns, f)

		@@ -450,27 +443,6 @@

		# Ensure each constraint has both coefficient and RHS terms
		analysis = df.group_by("labels").agg(
		[
		pl.col("coeffs").is_not_null().sum().alias("coeff_rows"),
		pl.col("sign").is_not_null().sum().alias("rhs_rows"),
		]
		)

		valid = analysis.filter(
		(pl.col("coeff_rows") > 0) & (pl.col("rhs_rows") > 0)
		)

		if valid.height == 0:
		continue

		# Keep only constraints that have both parts
		df = df.join(valid.select("labels"), on="labels", how="inner")

		# Sort by labels and mark first/last occurrences
		df = df.sort("labels").with_columns(
		[
		pl.when(pl.col("labels").is_first_distinct())
		.then(pl.col("labels"))
		.otherwise(pl.lit(None))
		.alias("labels_first"),
		pl.col("labels").is_first_distinct().alias("is_first_in_group"),
		(pl.col("labels") != pl.col("labels").shift(-1))
		@@ -482,13 +454,12 @@ .fill_null(True)

		row_labels = print_constraint(pl.col("labels_first"))
		row_labels = print_constraint(pl.col("labels"))
		col_labels = print_variable(pl.col("vars"))
		columns = [
		pl.when(pl.col("labels_first").is_not_null()).then(row_labels[0]),
		pl.when(pl.col("labels_first").is_not_null()).then(row_labels[1]),
		pl.when(pl.col("labels_first").is_not_null())
		.then(pl.lit(":\n"))
		.alias(":"),
		pl.when(pl.col("is_first_in_group")).then(row_labels[0]),
		pl.when(pl.col("is_first_in_group")).then(row_labels[1]),
		pl.when(pl.col("is_first_in_group")).then(pl.lit(":\n")).alias(":"),
		*signed_number(pl.col("coeffs")),
		pl.when(pl.col("vars").is_not_null()).then(col_labels[0]),
		pl.when(pl.col("vars").is_not_null()).then(col_labels[1]),
		col_labels[0],
		col_labels[1],
		pl.when(pl.col("is_last_in_group")).then(pl.lit("\n")),
		pl.when(pl.col("is_last_in_group")).then(pl.col("sign")),
		@@ -499,7 +470,3 @@ pl.when(pl.col("is_last_in_group")).then(pl.lit(" ")),

		kwargs: Any = dict(
		separator=" ", null_value="", quote_style="never", include_header=False
		)
		formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
		formatted.write_csv(f, **kwargs)
		_format_and_write(df, columns, f)

		@@ -506,0 +473,0 @@ # in the future, we could use lazy dataframes when they support appending

+71

-2

linopy/model.py

		@@ -137,2 +137,3 @@ """
		"_force_dim_names",
		"_auto_mask",
		"_solver_dir",
		@@ -149,2 +150,3 @@ "solver_model",
		force_dim_names: bool = False,
		auto_mask: bool = False,
		) -> None:
		@@ -169,2 +171,6 @@ """
		may become safer.
		auto_mask : bool
		Whether to automatically mask variables and constraints where
		bounds, coefficients, or RHS values contain NaN. The default is
		False.

		@@ -190,2 +196,3 @@ Returns
		self._force_dim_names: bool = bool(force_dim_names)
		self._auto_mask: bool = bool(auto_mask)
		self._solver_dir: Path = Path(
		@@ -322,2 +329,14 @@ gettempdir() if solver_dir is None else solver_dir
		@property
		def auto_mask(self) -> bool:
		"""
		If True, automatically mask variables and constraints where bounds,
		coefficients, or RHS values contain NaN.
		"""
		return self._auto_mask

		@auto_mask.setter
		def auto_mask(self, value: bool) -> None:
		self._auto_mask = bool(value)

		@property
		def solver_dir(self) -> Path:
		@@ -349,2 +368,3 @@ """
		"force_dim_names",
		"auto_mask",
		]
		@@ -541,2 +561,15 @@

		# Auto-mask based on NaN in bounds (use numpy for speed)
		if self.auto_mask:
		auto_mask_values = ~np.isnan(data.lower.values) & ~np.isnan(
		data.upper.values
		)
		auto_mask_arr = DataArray(
		auto_mask_values, coords=data.coords, dims=data.dims
		)
		if mask is not None:
		mask = mask & auto_mask_arr
		else:
		mask = auto_mask_arr

		start = self._xCounter
		@@ -548,3 +581,4 @@ end = start + data.labels.size
		if mask is not None:
		data.labels.values = data.labels.where(mask, -1).values
		# Use numpy where for speed (38x faster than xarray where)
		data.labels.values = np.where(mask.values, data.labels.values, -1)

		@@ -667,2 +701,10 @@ data = data.assign_attrs(

		# Capture original RHS for auto-masking before constraint creation
		# (NaN values in RHS are lost during constraint creation)
		# Use numpy for speed instead of xarray's notnull()
		original_rhs_mask = None
		if self.auto_mask and rhs is not None:
		rhs_da = as_dataarray(rhs)
		original_rhs_mask = (rhs_da.coords, rhs_da.dims, ~np.isnan(rhs_da.values))

		if isinstance(lhs, LinearExpression):
		@@ -720,3 +762,29 @@ if sign is None or rhs is None:
		)
		# Broadcast mask to match data shape for correct numpy where behavior
		if mask.shape != data.labels.shape:
		mask, _ = xr.broadcast(mask, data.labels)

		# Auto-mask based on null expressions or NaN RHS (use numpy for speed)
		if self.auto_mask:
		# Check if expression is null: all vars == -1
		# Use max() instead of all() - if max == -1, all are -1 (since valid vars >= 0)
		# This is ~30% faster for large term dimensions
		vars_all_invalid = data.vars.values.max(axis=-1) == -1
		auto_mask_values = ~vars_all_invalid
		if original_rhs_mask is not None:
		coords, dims, rhs_notnull = original_rhs_mask
		# Broadcast RHS mask to match data shape if needed
		if rhs_notnull.shape != auto_mask_values.shape:
		rhs_da = DataArray(rhs_notnull, coords=coords, dims=dims)
		rhs_da, _ = xr.broadcast(rhs_da, data.labels)
		rhs_notnull = rhs_da.values
		auto_mask_values = auto_mask_values & rhs_notnull
		auto_mask_arr = DataArray(
		auto_mask_values, coords=data.labels.coords, dims=data.labels.dims
		)
		if mask is not None:
		mask = mask & auto_mask_arr
		else:
		mask = auto_mask_arr

		self.check_force_dim_names(data)
		@@ -730,3 +798,4 @@
		if mask is not None:
		data.labels.values = data.labels.where(mask, -1).values
		# Use numpy where for speed (38x faster than xarray where)
		data.labels.values = np.where(mask.values, data.labels.values, -1)

		@@ -733,0 +802,0 @@ data = data.assign_attrs(label_range=(start, end), name=name)

+4

-1

linopy/variables.py

		@@ -17,2 +17,3 @@ """
		Any,
		cast,
		overload,
		@@ -424,3 +425,5 @@ )
		expr = self.to_linexpr()
		return expr._multiply_by_linear_expression(expr)
		return cast(
		"QuadraticExpression", expr._multiply_by_linear_expression(expr)
		)
		raise ValueError("Can only raise to the power of 2")
		@@ -427,0 +430,0 @@

+3

-3

linopy/version.py

		@@ -31,5 +31,5 @@ # file generated by setuptools-scm

		__version__ = version = '0.6.1'
		__version_tuple__ = version_tuple = (0, 6, 1)
		__version__ = version = '0.6.2'
		__version_tuple__ = version_tuple = (0, 6, 2)

		__commit_id__ = commit_id = 'g67e3484dc'
		__commit_id__ = commit_id = 'gc9f83bbd3'

+3

-2

PKG-INFO

		Metadata-Version: 2.4
		Name: linopy
		Version: 0.6.1
		Version: 0.6.2
		Summary: Linear optimization with N-D labeled arrays in Python
		@@ -30,3 +30,3 @@ Author-email: Fabian Hofmann <fabianmarikhofmann@gmail.com>
		Requires-Dist: dask>=0.18.0
		Requires-Dist: polars
		Requires-Dist: polars>=1.31
		Requires-Dist: tqdm
		@@ -43,2 +43,3 @@ Requires-Dist: deprecation
		Requires-Dist: sphinx_book_theme==1.1.3; extra == "docs"
		Requires-Dist: sphinx-copybutton==0.5.2; extra == "docs"
		Requires-Dist: nbsphinx==0.9.4; extra == "docs"
		@@ -45,0 +46,0 @@ Requires-Dist: nbsphinx-link==1.3.0; extra == "docs"

+2

-1

pyproject.toml

		@@ -36,3 +36,3 @@ [build-system]
		"dask>=0.18.0",
		"polars",
		"polars>=1.31",
		"tqdm",
		@@ -56,2 +56,3 @@ "deprecation",
		"sphinx_book_theme==1.1.3",
		"sphinx-copybutton==0.5.2",
		"nbsphinx==0.9.4",
		@@ -58,0 +59,0 @@ "nbsphinx-link==1.3.0",

+18

-0

test/test_common.py

		@@ -26,2 +26,3 @@ #!/usr/bin/env python3
		iterate_slices,
		maybe_group_terms_polars,
		)
		@@ -741,1 +742,18 @@ from linopy.testing import assert_linequal, assert_varequal
		assert is_constant(cv)


		def test_maybe_group_terms_polars_no_duplicates() -> None:
		"""Fast path: distinct (labels, vars) pairs skip group_by."""
		df = pl.DataFrame({"labels": [0, 0], "vars": [1, 2], "coeffs": [3.0, 4.0]})
		result = maybe_group_terms_polars(df)
		assert result.shape == (2, 3)
		assert result.columns == ["labels", "vars", "coeffs"]
		assert result["coeffs"].to_list() == [3.0, 4.0]


		def test_maybe_group_terms_polars_with_duplicates() -> None:
		"""Slow path: duplicate (labels, vars) pairs trigger group_by."""
		df = pl.DataFrame({"labels": [0, 0], "vars": [1, 1], "coeffs": [3.0, 4.0]})
		result = maybe_group_terms_polars(df)
		assert result.shape == (1, 3)
		assert result["coeffs"].to_list() == [7.0]

+14

-0

test/test_constraint.py

		@@ -440,2 +440,16 @@ #!/usr/bin/env python3

		def test_constraint_to_polars_mixed_signs(m: Model, x: linopy.Variable) -> None:
		"""Test to_polars when a constraint has mixed sign values across dims."""
		# Create a constraint, then manually patch the sign to have mixed values
		m.add_constraints(x >= 0, name="mixed")
		con = m.constraints["mixed"]
		# Replace sign data with mixed signs across the first dimension
		n = con.data.sizes["first"]
		signs = np.array(["<=" if i % 2 == 0 else ">=" for i in range(n)])
		con.data["sign"] = xr.DataArray(signs, dims=con.data["sign"].dims)
		df = con.to_polars()
		assert isinstance(df, pl.DataFrame)
		assert set(df["sign"].to_list()) == {"<=", ">="}


		def test_constraint_assignment_with_anonymous_constraints(
		@@ -442,0 +456,0 @@ m: Model, x: linopy.Variable, y: linopy.Variable

+37

-0

test/test_io.py

		@@ -339,1 +339,38 @@ #!/usr/bin/env python3
		gurobipy.read(str(fn))


		def test_to_file_lp_same_sign_constraints(tmp_path: Path) -> None:
		"""Test LP writing when all constraints have the same sign operator."""
		m = Model()
		N = np.arange(5)
		x = m.add_variables(coords=[N], name="x")
		# All constraints use <=
		m.add_constraints(x <= 10, name="upper")
		m.add_constraints(x <= 20, name="upper2")
		m.add_objective(x.sum())

		fn = tmp_path / "same_sign.lp"
		m.to_file(fn)
		content = fn.read_text()
		assert "s.t." in content
		assert "<=" in content


		def test_to_file_lp_mixed_sign_constraints(tmp_path: Path) -> None:
		"""Test LP writing when constraints have different sign operators."""
		m = Model()
		N = np.arange(5)
		x = m.add_variables(coords=[N], name="x")
		# Mix of <= and >= constraints in the same container
		m.add_constraints(x <= 10, name="upper")
		m.add_constraints(x >= 1, name="lower")
		m.add_constraints(2 * x == 8, name="eq")
		m.add_objective(x.sum())

		fn = tmp_path / "mixed_sign.lp"
		m.to_file(fn)
		content = fn.read_text()
		assert "s.t." in content
		assert "<=" in content
		assert ">=" in content
		assert "=" in content

+86

-0

test/test_linear_expression.py

		@@ -1316,1 +1316,87 @@ #!/usr/bin/env python3
		)


		def test_constant_only_expression_mul_dataarray(m: Model) -> None:
		const_arr = xr.DataArray([2, 3], dims=["dim_0"])
		const_expr = LinearExpression(const_arr, m)
		assert const_expr.is_constant
		assert const_expr.nterm == 0

		data_arr = xr.DataArray([10, 20], dims=["dim_0"])
		expected_const = const_arr * data_arr

		result = const_expr * data_arr
		assert isinstance(result, LinearExpression)
		assert result.is_constant
		assert (result.const == expected_const).all()

		result_rev = data_arr * const_expr
		assert isinstance(result_rev, LinearExpression)
		assert result_rev.is_constant
		assert (result_rev.const == expected_const).all()


		def test_constant_only_expression_mul_linexpr_with_vars(m: Model, x: Variable) -> None:
		const_arr = xr.DataArray([2, 3], dims=["dim_0"])
		const_expr = LinearExpression(const_arr, m)
		assert const_expr.is_constant
		assert const_expr.nterm == 0

		expr_with_vars = 1 * x + 5
		expected_coeffs = const_arr
		expected_const = const_arr * 5

		result = const_expr * expr_with_vars
		assert isinstance(result, LinearExpression)
		assert (result.coeffs == expected_coeffs).all()
		assert (result.const == expected_const).all()

		result_rev = expr_with_vars * const_expr
		assert isinstance(result_rev, LinearExpression)
		assert (result_rev.coeffs == expected_coeffs).all()
		assert (result_rev.const == expected_const).all()


		def test_constant_only_expression_mul_constant_only(m: Model) -> None:
		const_arr = xr.DataArray([2, 3], dims=["dim_0"])
		const_arr2 = xr.DataArray([4, 5], dims=["dim_0"])
		const_expr = LinearExpression(const_arr, m)
		const_expr2 = LinearExpression(const_arr2, m)
		assert const_expr.is_constant
		assert const_expr2.is_constant

		expected_const = const_arr * const_arr2

		result = const_expr * const_expr2
		assert isinstance(result, LinearExpression)
		assert result.is_constant
		assert (result.const == expected_const).all()

		result_rev = const_expr2 * const_expr
		assert isinstance(result_rev, LinearExpression)
		assert result_rev.is_constant
		assert (result_rev.const == expected_const).all()


		def test_constant_only_expression_mul_linexpr_with_vars_and_const(
		m: Model, x: Variable
		) -> None:
		const_arr = xr.DataArray([2, 3], dims=["dim_0"])
		const_expr = LinearExpression(const_arr, m)
		assert const_expr.is_constant

		expr_with_vars_and_const = 4 * x + 10
		expected_coeffs = const_arr * 4
		expected_const = const_arr * 10

		result = const_expr * expr_with_vars_and_const
		assert isinstance(result, LinearExpression)
		assert not result.is_constant
		assert (result.coeffs == expected_coeffs).all()
		assert (result.const == expected_const).all()

		result_rev = expr_with_vars_and_const * const_expr
		assert isinstance(result_rev, LinearExpression)
		assert not result_rev.is_constant
		assert (result_rev.coeffs == expected_coeffs).all()
		assert (result_rev.const == expected_const).all()

+64

-0

test/test_optimization.py

		@@ -1094,2 +1094,66 @@ #!/usr/bin/env python3

		@pytest.fixture
		def auto_mask_variable_model() -> Model:
		"""Model with auto_mask=True and NaN in variable bounds."""
		m = Model(auto_mask=True)

		x = m.add_variables(lower=0, coords=[range(10)], name="x")
		lower = pd.Series([0.0] * 8 + [np.nan, np.nan], range(10))
		y = m.add_variables(lower=lower, name="y") # NaN bounds auto-masked

		m.add_constraints(x + y, GREATER_EQUAL, 10)
		m.add_constraints(y, GREATER_EQUAL, 0)
		m.add_objective(2 * x + y)
		return m


		@pytest.fixture
		def auto_mask_constraint_model() -> Model:
		"""Model with auto_mask=True and NaN in constraint RHS."""
		m = Model(auto_mask=True)

		x = m.add_variables(lower=0, coords=[range(10)], name="x")
		y = m.add_variables(lower=0, coords=[range(10)], name="y")

		rhs = pd.Series([10.0] * 8 + [np.nan, np.nan], range(10))
		m.add_constraints(x + y, GREATER_EQUAL, rhs) # NaN rhs auto-masked
		m.add_constraints(x + y, GREATER_EQUAL, 5)

		m.add_objective(2 * x + y)
		return m


		@pytest.mark.parametrize("solver,io_api,explicit_coordinate_names", params)
		def test_auto_mask_variable_model(
		auto_mask_variable_model: Model,
		solver: str,
		io_api: str,
		explicit_coordinate_names: bool,
		) -> None:
		"""Test that auto_mask=True correctly masks variables with NaN bounds."""
		auto_mask_variable_model.solve(
		solver, io_api=io_api, explicit_coordinate_names=explicit_coordinate_names
		)
		y = auto_mask_variable_model.variables.y
		# Same assertions as test_masked_variable_model
		assert y.solution[-2:].isnull().all()
		assert y.solution[:-2].notnull().all()


		@pytest.mark.parametrize("solver,io_api,explicit_coordinate_names", params)
		def test_auto_mask_constraint_model(
		auto_mask_constraint_model: Model,
		solver: str,
		io_api: str,
		explicit_coordinate_names: bool,
		) -> None:
		"""Test that auto_mask=True correctly masks constraints with NaN RHS."""
		auto_mask_constraint_model.solve(
		solver, io_api=io_api, explicit_coordinate_names=explicit_coordinate_names
		)
		# Same assertions as test_masked_constraint_model
		assert (auto_mask_constraint_model.solution.y[:-2] == 10).all()
		assert (auto_mask_constraint_model.solution.y[-2:] == 5).all()


		# def init_model_large():
		@@ -1096,0 +1160,0 @@ # m = Model()

examples/create-a-model-with-coordinates.ipynb