ms2pip - PyPI Package Compare versions

+6

-6

ms2pip.egg-info/PKG-INFO

		@@ -1,7 +0,7 @@
		Metadata-Version: 2.2
		Metadata-Version: 2.4
		Name: ms2pip
		Version: 4.1.0
		Version: 4.1.1
		Summary: MS2PIP: Accurate and versatile peptide fragmentation spectrum prediction.
		Author: Ana Sílvia C. Silva
		Author-email: Ralf Gabriels <ralf@gabriels.dev>, Sven Degroeve <sven.degroeve@ugent.be>, Arthur Declercq <arthur.declercq@ugent.be>, Kevin Velghe <kevin.velghe@ugent.be>
		Author: Sven Degroeve, Ana Sílvia C. Silva
		Author-email: Ralf Gabriels <ralf@gabriels.dev>, Arthur Declercq <arthur.declercq@ugent.be>, Kevin Velghe <kevin.velghe@ugent.be>
		License: Apache License
		@@ -230,4 +230,3 @@ Version 2.0, January 2004
		Requires-Dist: pyteomics<5,>=3.5
		Requires-Dist: tomlkit<1,>=0.5
		Requires-Dist: sqlalchemy<2,>=1.3
		Requires-Dist: sqlalchemy<3,>=1.4
		Requires-Dist: click<9,>=7
		@@ -258,2 +257,3 @@ Requires-Dist: xgboost<3,>=1.3
		Requires-Dist: sphinx-click; extra == "docs"
		Dynamic: license-file

		@@ -260,0 +260,0 @@ .. image:: https://github.com/compomics/ms2pip_c/raw/releases/img/ms2pip_logo_1000px.png

+1

-2

ms2pip.egg-info/requires.txt

		@@ -5,4 +5,3 @@ numpy<3,>=1.25
		pyteomics<5,>=3.5
		tomlkit<1,>=0.5
		sqlalchemy<2,>=1.3
		sqlalchemy<3,>=1.4
		click<9,>=7
		@@ -9,0 +8,0 @@ xgboost<3,>=1.3

+1

-1

ms2pip/__init__.py

		# isort: skip_file
		"""MS2PIP: Accurate and versatile peptide fragmentation spectrum prediction."""

		__version__ = "4.1.0"
		__version__ = "4.1.1"

		@@ -6,0 +6,0 @@ from warnings import filterwarnings

+40

-34

ms2pip/_utils/dlib.py

		@@ -21,2 +21,3 @@ """Database configuration for EncyclopeDIA DLIB SQLite format."""
		from sqlalchemy.dialects.sqlite import BLOB
		from sqlalchemy.engine import Connection

		@@ -27,7 +28,8 @@ DLIB_VERSION = "0.1.14"
		class CompressedArray(TypeDecorator):
		""" Sqlite-like does not support arrays.
		Let's use a custom type decorator.
		"""Sqlite-like does not support arrays.
		Let's use a custom type decorator.

		See http://docs.sqlalchemy.org/en/latest/core/types.html#sqlalchemy.types.TypeDecorator
		See http://docs.sqlalchemy.org/en/latest/core/types.html#sqlalchemy.types.TypeDecorator
		"""

		impl = BLOB
		@@ -54,49 +56,53 @@

		big_float = numpy.dtype('>f4')
		big_double = numpy.dtype('>f8')
		big_float = numpy.dtype(">f4")
		big_double = numpy.dtype(">f8")

		Entry = Table(
		'entries',
		"entries",
		metadata,
		Column('PrecursorMz', Float, nullable=False, index=True),
		Column('PrecursorCharge', Integer, nullable=False),
		Column('PeptideModSeq', String, nullable=False),
		Column('PeptideSeq', String, nullable=False, index=True),
		Column('Copies', Integer, nullable=False),
		Column('RTInSeconds', Float, nullable=False),
		Column('Score', Float, nullable=False),
		Column('MassEncodedLength', Integer, nullable=False),
		Column('MassArray', CompressedArray(big_double), nullable=False),
		Column('IntensityEncodedLength', Integer, nullable=False),
		Column('IntensityArray', CompressedArray(big_float), nullable=False),
		Column('CorrelationEncodedLength', Integer, nullable=True),
		Column('CorrelationArray', CompressedArray(big_float), nullable=True),
		Column('RTInSecondsStart', Float, nullable=True),
		Column('RTInSecondsStop', Float, nullable=True),
		Column('MedianChromatogramEncodedLength', Integer, nullable=True),
		Column('MedianChromatogramArray', CompressedArray(big_float), nullable=True),
		Column('SourceFile', String, nullable=False),
		Column("PrecursorMz", Float, nullable=False, index=True),
		Column("PrecursorCharge", Integer, nullable=False),
		Column("PeptideModSeq", String, nullable=False),
		Column("PeptideSeq", String, nullable=False, index=True),
		Column("Copies", Integer, nullable=False),
		Column("RTInSeconds", Float, nullable=False),
		Column("Score", Float, nullable=False),
		Column("MassEncodedLength", Integer, nullable=False),
		Column("MassArray", CompressedArray(big_double), nullable=False),
		Column("IntensityEncodedLength", Integer, nullable=False),
		Column("IntensityArray", CompressedArray(big_float), nullable=False),
		Column("CorrelationEncodedLength", Integer, nullable=True),
		Column("CorrelationArray", CompressedArray(big_float), nullable=True),
		Column("RTInSecondsStart", Float, nullable=True),
		Column("RTInSecondsStop", Float, nullable=True),
		Column("MedianChromatogramEncodedLength", Integer, nullable=True),
		Column("MedianChromatogramArray", CompressedArray(big_float), nullable=True),
		Column("SourceFile", String, nullable=False),
		)

		Index('ix_entries_PeptideModSeq_PrecursorCharge_SourceFile', Entry.c.PeptideModSeq, Entry.c.PrecursorCharge, Entry.c.SourceFile)
		Index(
		"ix_entries_PeptideModSeq_PrecursorCharge_SourceFile",
		Entry.c.PeptideModSeq,
		Entry.c.PrecursorCharge,
		Entry.c.SourceFile,
		)

		PeptideToProtein = Table(
		'peptidetoprotein',
		"peptidetoprotein",
		metadata,
		Column('PeptideSeq', String, nullable=False, index=True),
		Column('isDecoy', Boolean, nullable=True),
		Column('ProteinAccession', String, nullable=False, index=True),
		Column("PeptideSeq", String, nullable=False, index=True),
		Column("isDecoy", Boolean, nullable=True),
		Column("ProteinAccession", String, nullable=False, index=True),
		)

		Metadata = Table(
		'metadata',
		"metadata",
		metadata,
		Column('Key', String, nullable=False, index=True),
		Column('Value', String, nullable=False),
		Column("Key", String, nullable=False, index=True),
		Column("Value", String, nullable=False),
		)


		def open_sqlite(filename: Union[str, Path]) -> sqlalchemy.engine.Connection:
		def open_sqlite(filename: Union[str, Path]) -> Connection:
		engine = sqlalchemy.create_engine(f"sqlite:///{filename}")
		metadata.bind = engine
		return engine.connect()

+1

-1

ms2pip/search_space.py

		@@ -18,3 +18,3 @@ """
		"min_length": 8,
		"max_length": 3,
		"max_length": 30,
		"cleavage_rule": "trypsin",
		@@ -21,0 +21,0 @@ "missed_cleavages": 2,

+14

-12

ms2pip/spectrum_output.py

		@@ -50,2 +50,3 @@ """
		from pathlib import Path
		from os import PathLike
		from time import localtime, strftime
		@@ -57,3 +58,4 @@ from typing import Any, Dict, Generator, List, Optional, Union
		from pyteomics import proforma
		from sqlalchemy import engine, select
		from sqlalchemy import select
		from sqlalchemy.engine import Connection

		@@ -67,3 +69,3 @@ from ms2pip._utils import dlib
		def write_spectra(
		filename: Union[str, Path],
		filename: Union[str, PathLike],
		processing_results: List[ProcessingResult],
		@@ -98,3 +100,3 @@ file_format: str = "tsv",

		def __init__(self, filename: Union[str, Path], write_mode: str = "w"):
		def __init__(self, filename: Union[str, PathLike], write_mode: str = "w"):
		self.filename = Path(filename).with_suffix(self.suffix)
		@@ -473,3 +475,3 @@ self.write_mode = write_mode

		def __init__(self, filename: Union[str, Path], write_mode: str = "w"):
		def __init__(self, filename: Union[str, PathLike], write_mode: str = "w"):
		super().__init__(filename, write_mode)
		@@ -626,3 +628,3 @@ self.ssl_file = self.filename.with_suffix(self.ssl_suffix)
		@staticmethod
		def _get_last_ssl_scan_number(ssl_file: Union[str, Path, StringIO]):
		def _get_last_ssl_scan_number(ssl_file: Union[str, PathLike, StringIO]):
		"""Read scan number of last line in a Bibliospec SSL file."""
		@@ -662,3 +664,3 @@ if isinstance(ssl_file, StringIO):
		connection = self._file_object
		dlib.metadata.create_all()
		dlib.metadata.create_all(connection.engine)
		self._write_metadata(connection)
		@@ -692,7 +694,7 @@ self._write_entries(processing_results, connection, self.filename)
		@staticmethod
		def _write_metadata(connection: engine.Connection):
		def _write_metadata(connection: Connection):
		"""Write metadata to DLIB SQLite file."""
		with connection.begin():
		version = connection.execute(
		select([dlib.Metadata.c.Value]).where(dlib.Metadata.c.Key == "version")
		select(dlib.Metadata.c.Value).where(dlib.Metadata.c.Key == "version")
		).scalar()
		@@ -710,4 +712,4 @@ if version is None:
		processing_results: List[ProcessingResult],
		connection: engine.Connection,
		output_filename: str,
		connection: Connection,
		output_filename: Union[str, PathLike],
		):
		@@ -742,3 +744,3 @@ """Write spectra to DLIB SQLite file."""
		@staticmethod
		def _write_peptide_to_protein(results: List[ProcessingResult], connection: engine.Connection):
		def _write_peptide_to_protein(results: List[ProcessingResult], connection: Connection):
		"""Write peptide-to-protein mappings to DLIB SQLite file."""
		@@ -756,3 +758,3 @@ peptide_to_proteins = {
		for peptide_to_protein in connection.execute(
		dlib.PeptideToProtein.select().where(
		select(dlib.PeptideToProtein).where(
		dlib.PeptideToProtein.c.ProteinAccession.in_(proteins)
		@@ -759,0 +761,0 @@ )

+6

-6

PKG-INFO

		@@ -1,7 +0,7 @@
		Metadata-Version: 2.2
		Metadata-Version: 2.4
		Name: ms2pip
		Version: 4.1.0
		Version: 4.1.1
		Summary: MS2PIP: Accurate and versatile peptide fragmentation spectrum prediction.
		Author: Ana Sílvia C. Silva
		Author-email: Ralf Gabriels <ralf@gabriels.dev>, Sven Degroeve <sven.degroeve@ugent.be>, Arthur Declercq <arthur.declercq@ugent.be>, Kevin Velghe <kevin.velghe@ugent.be>
		Author: Sven Degroeve, Ana Sílvia C. Silva
		Author-email: Ralf Gabriels <ralf@gabriels.dev>, Arthur Declercq <arthur.declercq@ugent.be>, Kevin Velghe <kevin.velghe@ugent.be>
		License: Apache License
		@@ -230,4 +230,3 @@ Version 2.0, January 2004
		Requires-Dist: pyteomics<5,>=3.5
		Requires-Dist: tomlkit<1,>=0.5
		Requires-Dist: sqlalchemy<2,>=1.3
		Requires-Dist: sqlalchemy<3,>=1.4
		Requires-Dist: click<9,>=7
		@@ -258,2 +257,3 @@ Requires-Dist: xgboost<3,>=1.3
		Requires-Dist: sphinx-click; extra == "docs"
		Dynamic: license-file

		@@ -260,0 +260,0 @@ .. image:: https://github.com/compomics/ms2pip_c/raw/releases/img/ms2pip_logo_1000px.png

+6

-4

pyproject.toml

		@@ -19,3 +19,3 @@ [project]
		{ name = "Ralf Gabriels", email = "ralf@gabriels.dev" },
		{ name = "Sven Degroeve", email = "sven.degroeve@ugent.be" },
		{ name = "Sven Degroeve" },
		{ name = "Arthur Declercq", email = "arthur.declercq@ugent.be" },
		@@ -41,4 +41,3 @@ { name = "Kevin Velghe", email = "kevin.velghe@ugent.be" },
		"pyteomics>=3.5,<5",
		"tomlkit>=0.5,<1",
		"sqlalchemy>=1.3,<2",
		"sqlalchemy>=1.4,<3",
		"click>=7,<9",
		@@ -95,7 +94,10 @@ "xgboost>=1.3,<3",
		[tool.cibuildwheel]
		build = "cp39-manylinux_x86_64 cp39-win_amd64 cp39-macosx_x86_64 cp39-macosx_arm64"
		build = "cp3-manylinux_x86_64 cp3-win_amd64 cp3-macosx_x86_64 cp3-macosx_arm64"
		skip = "cp36-* cp37-* cp38-*"
		test-command = "ms2pip --help"

		# Prevent building from source for packages with complex C/C++ dependencies
		environment = { PIP_ONLY_BINARY = "pyarrow,pandas,numpy,lxml,xgboost" }

		[tool.cibuildwheel.macos]
		before-all = "brew install libomp"

+231

-2

tests/test_spectrum_output.py

		@@ -1,6 +0,12 @@
		from psm_utils import Peptidoform
		import tempfile
		from pathlib import Path

		from ms2pip.spectrum_output import MSP, Bibliospec, DLIB
		import numpy as np
		import pytest
		from psm_utils import PSM, Peptidoform

		from ms2pip.result import ProcessingResult
		from ms2pip.spectrum_output import DLIB, MSP, Bibliospec


		class TestMSP:
		@@ -48,1 +54,224 @@ def test__format_modification_string(self):
		assert DLIB._format_modified_sequence(Peptidoform(test_in)) == expected_out

		def test_dlib_database_creation(self):
		"""Test that DLIB file creation works with SQLAlchemy (integration test)."""
		# Create test data
		pep = Peptidoform("ACDE/2")
		psm = PSM(
		peptidoform=pep,
		spectrum_id=1,
		retention_time=100.0,
		protein_list=["PROT1", "PROT2"],
		)
		result = ProcessingResult(
		psm_index=0,
		psm=psm,
		theoretical_mz={
		"b": np.array([72.04435, 175.05354, 290.08047], dtype=np.float32),
		"y": np.array([148.0604, 263.0873, 366.0965], dtype=np.float32),
		},
		predicted_intensity={
		"b": np.array([0.1, 0.5, 0.3], dtype=np.float32),
		"y": np.array([0.8, 0.6, 0.2], dtype=np.float32),
		},
		observed_intensity=None,
		correlation=None,
		feature_vectors=None,
		)

		# Write DLIB file
		with tempfile.TemporaryDirectory() as tmpdir:
		dlib_file = Path(tmpdir) / "test.dlib"
		with DLIB(dlib_file) as writer:
		writer.write([result])

		# Verify file was created
		assert dlib_file.exists()

		# Verify database structure and content using SQLAlchemy
		from ms2pip._utils import dlib as dlib_module

		connection = dlib_module.open_sqlite(dlib_file)
		try:
		# Test that metadata table exists and has version
		from sqlalchemy import select

		version = connection.execute(
		select(dlib_module.Metadata.c.Value).where(
		dlib_module.Metadata.c.Key == "version"
		)
		).scalar()
		assert version == dlib_module.DLIB_VERSION

		# Test that Entry table has data (select specific columns to avoid nullable CompressedArray)
		from sqlalchemy import func

		entry_count = connection.execute(
		select(func.count()).select_from(dlib_module.Entry)
		).scalar()
		assert entry_count == 1

		# Select specific non-nullable columns
		entry = connection.execute(
		select(
		dlib_module.Entry.c.PeptideSeq,
		dlib_module.Entry.c.PrecursorCharge,
		dlib_module.Entry.c.RTInSeconds,
		dlib_module.Entry.c.MassArray,
		dlib_module.Entry.c.IntensityArray,
		)
		).fetchone()
		assert entry.PeptideSeq == "ACDE"
		assert entry.PrecursorCharge == 2
		assert entry.RTInSeconds == 100.0
		assert len(entry.MassArray) == 6 # 3 b-ions + 3 y-ions
		assert len(entry.IntensityArray) == 6

		# Test that PeptideToProtein table has data
		peptide_to_proteins = connection.execute(
		select(dlib_module.PeptideToProtein)
		).fetchall()
		assert len(peptide_to_proteins) == 2
		proteins = {p.ProteinAccession for p in peptide_to_proteins}
		assert proteins == {"PROT1", "PROT2"}
		assert all(p.PeptideSeq == "ACDE" for p in peptide_to_proteins)
		finally:
		connection.close()

		def test_dlib_multiple_results(self):
		"""Test writing multiple ProcessingResults to DLIB file."""
		# Create multiple test results
		results = []
		for i, seq in enumerate(["ACDE/2", "PEPTIDE/2", "TESTK/2"]):
		pep = Peptidoform(seq)
		psm = PSM(
		peptidoform=pep,
		spectrum_id=i,
		retention_time=100.0 + i * 10,
		protein_list=[f"PROT{i}"],
		)
		result = ProcessingResult(
		psm_index=i,
		psm=psm,
		theoretical_mz={
		"b": np.array([72.04435, 175.05354], dtype=np.float32),
		"y": np.array([148.0604, 263.0873], dtype=np.float32),
		},
		predicted_intensity={
		"b": np.array([0.1, 0.5], dtype=np.float32),
		"y": np.array([0.8, 0.6], dtype=np.float32),
		},
		observed_intensity=None,
		correlation=None,
		feature_vectors=None,
		)
		results.append(result)

		# Write DLIB file
		with tempfile.TemporaryDirectory() as tmpdir:
		dlib_file = Path(tmpdir) / "test_multiple.dlib"
		with DLIB(dlib_file) as writer:
		writer.write(results)

		# Verify all entries were written
		from sqlalchemy import select

		from ms2pip._utils import dlib as dlib_module

		connection = dlib_module.open_sqlite(dlib_file)
		try:
		# Select specific columns to avoid nullable CompressedArray
		entries = connection.execute(
		select(
		dlib_module.Entry.c.PeptideSeq,
		dlib_module.Entry.c.RTInSeconds,
		)
		).fetchall()
		assert len(entries) == 3

		peptides = {e.PeptideSeq for e in entries}
		assert peptides == {"ACDE", "PEPTIDE", "TESTK"}

		# Verify retention times
		rt_values = {e.RTInSeconds for e in entries}
		assert rt_values == {100.0, 110.0, 120.0}
		finally:
		connection.close()

		def test_dlib_sqlalchemy_select_syntax(self):
		"""Test that SQLAlchemy v2 select() syntax works correctly."""
		# This test specifically verifies the SQLAlchemy v2 compatibility changes
		pep = Peptidoform("ACDE/2")
		psm = PSM(
		peptidoform=pep,
		spectrum_id=1,
		retention_time=100.0,
		protein_list=["PROT1"],
		)
		result = ProcessingResult(
		psm_index=0,
		psm=psm,
		theoretical_mz={
		"b": np.array([72.04435], dtype=np.float32),
		"y": np.array([148.0604], dtype=np.float32),
		},
		predicted_intensity={
		"b": np.array([0.5], dtype=np.float32),
		"y": np.array([0.8], dtype=np.float32),
		},
		observed_intensity=None,
		correlation=None,
		feature_vectors=None,
		)

		with tempfile.TemporaryDirectory() as tmpdir:
		dlib_file = Path(tmpdir) / "test_sqlalchemy.dlib"
		with DLIB(dlib_file) as writer:
		writer.write([result])

		# Test the specific SQLAlchemy operations that were modified
		from sqlalchemy import select

		from ms2pip._utils import dlib as dlib_module

		connection = dlib_module.open_sqlite(dlib_file)
		try:
		# Test select(Table) syntax (changed from Table.select())
		peptide_to_protein_results = connection.execute(
		select(dlib_module.PeptideToProtein).where(
		dlib_module.PeptideToProtein.c.ProteinAccession == "PROT1"
		)
		).fetchall()
		assert len(peptide_to_protein_results) == 1
		assert peptide_to_protein_results[0].PeptideSeq == "ACDE"

		# Test select(column) syntax (changed from select([column]))
		version = connection.execute(
		select(dlib_module.Metadata.c.Value).where(
		dlib_module.Metadata.c.Key == "version"
		)
		).scalar()
		assert version is not None
		assert version == dlib_module.DLIB_VERSION
		finally:
		connection.close()

		def test_dlib_missing_retention_time(self):
		"""Test that DLIB writing raises error when retention time is missing."""
		pep = Peptidoform("ACDE/2")
		psm = PSM(peptidoform=pep, spectrum_id=1) # No retention_time
		result = ProcessingResult(
		psm_index=0,
		psm=psm,
		theoretical_mz={"b": np.array([72.04435], dtype=np.float32)},
		predicted_intensity={"b": np.array([0.5], dtype=np.float32)},
		observed_intensity=None,
		correlation=None,
		feature_vectors=None,
		)

		with tempfile.TemporaryDirectory() as tmpdir:
		dlib_file = Path(tmpdir) / "test_no_rt.dlib"
		with pytest.raises(ValueError, match="Retention time required"):
		with DLIB(dlib_file) as writer:
		writer.write([result])

ms2pip - pypi Package Compare versions

Improved metrics