You're Invited:Meet the Socket Team at RSAC and BSidesSF 2026, March 23–26.RSVP
Socket
Book a DemoSign in
Socket

ms2pip

Package Overview
Dependencies
Maintainers
3
Versions
33
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

ms2pip - pypi Package Compare versions

Comparing version
4.1.0
to
4.1.1
+6
-6
ms2pip.egg-info/PKG-INFO

@@ -1,7 +0,7 @@

Metadata-Version: 2.2
Metadata-Version: 2.4
Name: ms2pip
Version: 4.1.0
Version: 4.1.1
Summary: MS2PIP: Accurate and versatile peptide fragmentation spectrum prediction.
Author: Ana Sílvia C. Silva
Author-email: Ralf Gabriels <ralf@gabriels.dev>, Sven Degroeve <sven.degroeve@ugent.be>, Arthur Declercq <arthur.declercq@ugent.be>, Kevin Velghe <kevin.velghe@ugent.be>
Author: Sven Degroeve, Ana Sílvia C. Silva
Author-email: Ralf Gabriels <ralf@gabriels.dev>, Arthur Declercq <arthur.declercq@ugent.be>, Kevin Velghe <kevin.velghe@ugent.be>
License: Apache License

@@ -230,4 +230,3 @@ Version 2.0, January 2004

Requires-Dist: pyteomics<5,>=3.5
Requires-Dist: tomlkit<1,>=0.5
Requires-Dist: sqlalchemy<2,>=1.3
Requires-Dist: sqlalchemy<3,>=1.4
Requires-Dist: click<9,>=7

@@ -258,2 +257,3 @@ Requires-Dist: xgboost<3,>=1.3

Requires-Dist: sphinx-click; extra == "docs"
Dynamic: license-file

@@ -260,0 +260,0 @@ .. image:: https://github.com/compomics/ms2pip_c/raw/releases/img/ms2pip_logo_1000px.png

@@ -5,4 +5,3 @@ numpy<3,>=1.25

pyteomics<5,>=3.5
tomlkit<1,>=0.5
sqlalchemy<2,>=1.3
sqlalchemy<3,>=1.4
click<9,>=7

@@ -9,0 +8,0 @@ xgboost<3,>=1.3

# isort: skip_file
"""MS2PIP: Accurate and versatile peptide fragmentation spectrum prediction."""
__version__ = "4.1.0"
__version__ = "4.1.1"

@@ -6,0 +6,0 @@ from warnings import filterwarnings

@@ -21,2 +21,3 @@ """Database configuration for EncyclopeDIA DLIB SQLite format."""

from sqlalchemy.dialects.sqlite import BLOB
from sqlalchemy.engine import Connection

@@ -27,7 +28,8 @@ DLIB_VERSION = "0.1.14"

class CompressedArray(TypeDecorator):
""" Sqlite-like does not support arrays.
Let's use a custom type decorator.
"""Sqlite-like does not support arrays.
Let's use a custom type decorator.
See http://docs.sqlalchemy.org/en/latest/core/types.html#sqlalchemy.types.TypeDecorator
See http://docs.sqlalchemy.org/en/latest/core/types.html#sqlalchemy.types.TypeDecorator
"""
impl = BLOB

@@ -54,49 +56,53 @@

big_float = numpy.dtype('>f4')
big_double = numpy.dtype('>f8')
big_float = numpy.dtype(">f4")
big_double = numpy.dtype(">f8")
Entry = Table(
'entries',
"entries",
metadata,
Column('PrecursorMz', Float, nullable=False, index=True),
Column('PrecursorCharge', Integer, nullable=False),
Column('PeptideModSeq', String, nullable=False),
Column('PeptideSeq', String, nullable=False, index=True),
Column('Copies', Integer, nullable=False),
Column('RTInSeconds', Float, nullable=False),
Column('Score', Float, nullable=False),
Column('MassEncodedLength', Integer, nullable=False),
Column('MassArray', CompressedArray(big_double), nullable=False),
Column('IntensityEncodedLength', Integer, nullable=False),
Column('IntensityArray', CompressedArray(big_float), nullable=False),
Column('CorrelationEncodedLength', Integer, nullable=True),
Column('CorrelationArray', CompressedArray(big_float), nullable=True),
Column('RTInSecondsStart', Float, nullable=True),
Column('RTInSecondsStop', Float, nullable=True),
Column('MedianChromatogramEncodedLength', Integer, nullable=True),
Column('MedianChromatogramArray', CompressedArray(big_float), nullable=True),
Column('SourceFile', String, nullable=False),
Column("PrecursorMz", Float, nullable=False, index=True),
Column("PrecursorCharge", Integer, nullable=False),
Column("PeptideModSeq", String, nullable=False),
Column("PeptideSeq", String, nullable=False, index=True),
Column("Copies", Integer, nullable=False),
Column("RTInSeconds", Float, nullable=False),
Column("Score", Float, nullable=False),
Column("MassEncodedLength", Integer, nullable=False),
Column("MassArray", CompressedArray(big_double), nullable=False),
Column("IntensityEncodedLength", Integer, nullable=False),
Column("IntensityArray", CompressedArray(big_float), nullable=False),
Column("CorrelationEncodedLength", Integer, nullable=True),
Column("CorrelationArray", CompressedArray(big_float), nullable=True),
Column("RTInSecondsStart", Float, nullable=True),
Column("RTInSecondsStop", Float, nullable=True),
Column("MedianChromatogramEncodedLength", Integer, nullable=True),
Column("MedianChromatogramArray", CompressedArray(big_float), nullable=True),
Column("SourceFile", String, nullable=False),
)
Index('ix_entries_PeptideModSeq_PrecursorCharge_SourceFile', Entry.c.PeptideModSeq, Entry.c.PrecursorCharge, Entry.c.SourceFile)
Index(
"ix_entries_PeptideModSeq_PrecursorCharge_SourceFile",
Entry.c.PeptideModSeq,
Entry.c.PrecursorCharge,
Entry.c.SourceFile,
)
PeptideToProtein = Table(
'peptidetoprotein',
"peptidetoprotein",
metadata,
Column('PeptideSeq', String, nullable=False, index=True),
Column('isDecoy', Boolean, nullable=True),
Column('ProteinAccession', String, nullable=False, index=True),
Column("PeptideSeq", String, nullable=False, index=True),
Column("isDecoy", Boolean, nullable=True),
Column("ProteinAccession", String, nullable=False, index=True),
)
Metadata = Table(
'metadata',
"metadata",
metadata,
Column('Key', String, nullable=False, index=True),
Column('Value', String, nullable=False),
Column("Key", String, nullable=False, index=True),
Column("Value", String, nullable=False),
)
def open_sqlite(filename: Union[str, Path]) -> sqlalchemy.engine.Connection:
def open_sqlite(filename: Union[str, Path]) -> Connection:
engine = sqlalchemy.create_engine(f"sqlite:///{filename}")
metadata.bind = engine
return engine.connect()

@@ -18,3 +18,3 @@ """

"min_length": 8,
"max_length": 3,
"max_length": 30,
"cleavage_rule": "trypsin",

@@ -21,0 +21,0 @@ "missed_cleavages": 2,

@@ -50,2 +50,3 @@ """

from pathlib import Path
from os import PathLike
from time import localtime, strftime

@@ -57,3 +58,4 @@ from typing import Any, Dict, Generator, List, Optional, Union

from pyteomics import proforma
from sqlalchemy import engine, select
from sqlalchemy import select
from sqlalchemy.engine import Connection

@@ -67,3 +69,3 @@ from ms2pip._utils import dlib

def write_spectra(
filename: Union[str, Path],
filename: Union[str, PathLike],
processing_results: List[ProcessingResult],

@@ -98,3 +100,3 @@ file_format: str = "tsv",

def __init__(self, filename: Union[str, Path], write_mode: str = "w"):
def __init__(self, filename: Union[str, PathLike], write_mode: str = "w"):
self.filename = Path(filename).with_suffix(self.suffix)

@@ -473,3 +475,3 @@ self.write_mode = write_mode

def __init__(self, filename: Union[str, Path], write_mode: str = "w"):
def __init__(self, filename: Union[str, PathLike], write_mode: str = "w"):
super().__init__(filename, write_mode)

@@ -626,3 +628,3 @@ self.ssl_file = self.filename.with_suffix(self.ssl_suffix)

@staticmethod
def _get_last_ssl_scan_number(ssl_file: Union[str, Path, StringIO]):
def _get_last_ssl_scan_number(ssl_file: Union[str, PathLike, StringIO]):
"""Read scan number of last line in a Bibliospec SSL file."""

@@ -662,3 +664,3 @@ if isinstance(ssl_file, StringIO):

connection = self._file_object
dlib.metadata.create_all()
dlib.metadata.create_all(connection.engine)
self._write_metadata(connection)

@@ -692,7 +694,7 @@ self._write_entries(processing_results, connection, self.filename)

@staticmethod
def _write_metadata(connection: engine.Connection):
def _write_metadata(connection: Connection):
"""Write metadata to DLIB SQLite file."""
with connection.begin():
version = connection.execute(
select([dlib.Metadata.c.Value]).where(dlib.Metadata.c.Key == "version")
select(dlib.Metadata.c.Value).where(dlib.Metadata.c.Key == "version")
).scalar()

@@ -710,4 +712,4 @@ if version is None:

processing_results: List[ProcessingResult],
connection: engine.Connection,
output_filename: str,
connection: Connection,
output_filename: Union[str, PathLike],
):

@@ -742,3 +744,3 @@ """Write spectra to DLIB SQLite file."""

@staticmethod
def _write_peptide_to_protein(results: List[ProcessingResult], connection: engine.Connection):
def _write_peptide_to_protein(results: List[ProcessingResult], connection: Connection):
"""Write peptide-to-protein mappings to DLIB SQLite file."""

@@ -756,3 +758,3 @@ peptide_to_proteins = {

for peptide_to_protein in connection.execute(
dlib.PeptideToProtein.select().where(
select(dlib.PeptideToProtein).where(
dlib.PeptideToProtein.c.ProteinAccession.in_(proteins)

@@ -759,0 +761,0 @@ )

@@ -1,7 +0,7 @@

Metadata-Version: 2.2
Metadata-Version: 2.4
Name: ms2pip
Version: 4.1.0
Version: 4.1.1
Summary: MS2PIP: Accurate and versatile peptide fragmentation spectrum prediction.
Author: Ana Sílvia C. Silva
Author-email: Ralf Gabriels <ralf@gabriels.dev>, Sven Degroeve <sven.degroeve@ugent.be>, Arthur Declercq <arthur.declercq@ugent.be>, Kevin Velghe <kevin.velghe@ugent.be>
Author: Sven Degroeve, Ana Sílvia C. Silva
Author-email: Ralf Gabriels <ralf@gabriels.dev>, Arthur Declercq <arthur.declercq@ugent.be>, Kevin Velghe <kevin.velghe@ugent.be>
License: Apache License

@@ -230,4 +230,3 @@ Version 2.0, January 2004

Requires-Dist: pyteomics<5,>=3.5
Requires-Dist: tomlkit<1,>=0.5
Requires-Dist: sqlalchemy<2,>=1.3
Requires-Dist: sqlalchemy<3,>=1.4
Requires-Dist: click<9,>=7

@@ -258,2 +257,3 @@ Requires-Dist: xgboost<3,>=1.3

Requires-Dist: sphinx-click; extra == "docs"
Dynamic: license-file

@@ -260,0 +260,0 @@ .. image:: https://github.com/compomics/ms2pip_c/raw/releases/img/ms2pip_logo_1000px.png

@@ -19,3 +19,3 @@ [project]

{ name = "Ralf Gabriels", email = "ralf@gabriels.dev" },
{ name = "Sven Degroeve", email = "sven.degroeve@ugent.be" },
{ name = "Sven Degroeve" },
{ name = "Arthur Declercq", email = "arthur.declercq@ugent.be" },

@@ -41,4 +41,3 @@ { name = "Kevin Velghe", email = "kevin.velghe@ugent.be" },

"pyteomics>=3.5,<5",
"tomlkit>=0.5,<1",
"sqlalchemy>=1.3,<2",
"sqlalchemy>=1.4,<3",
"click>=7,<9",

@@ -95,7 +94,10 @@ "xgboost>=1.3,<3",

[tool.cibuildwheel]
build = "cp39*-manylinux_x86_64 cp39*-win_amd64 cp39*-macosx_x86_64 cp39*-macosx_arm64"
build = "cp3*-manylinux_x86_64 cp3*-win_amd64 cp3*-macosx_x86_64 cp3*-macosx_arm64"
skip = "cp36-* cp37-* cp38-*"
test-command = "ms2pip --help"
# Prevent building from source for packages with complex C/C++ dependencies
environment = { PIP_ONLY_BINARY = "pyarrow,pandas,numpy,lxml,xgboost" }
[tool.cibuildwheel.macos]
before-all = "brew install libomp"

@@ -1,6 +0,12 @@

from psm_utils import Peptidoform
import tempfile
from pathlib import Path
from ms2pip.spectrum_output import MSP, Bibliospec, DLIB
import numpy as np
import pytest
from psm_utils import PSM, Peptidoform
from ms2pip.result import ProcessingResult
from ms2pip.spectrum_output import DLIB, MSP, Bibliospec
class TestMSP:

@@ -48,1 +54,224 @@ def test__format_modification_string(self):

assert DLIB._format_modified_sequence(Peptidoform(test_in)) == expected_out
def test_dlib_database_creation(self):
"""Test that DLIB file creation works with SQLAlchemy (integration test)."""
# Create test data
pep = Peptidoform("ACDE/2")
psm = PSM(
peptidoform=pep,
spectrum_id=1,
retention_time=100.0,
protein_list=["PROT1", "PROT2"],
)
result = ProcessingResult(
psm_index=0,
psm=psm,
theoretical_mz={
"b": np.array([72.04435, 175.05354, 290.08047], dtype=np.float32),
"y": np.array([148.0604, 263.0873, 366.0965], dtype=np.float32),
},
predicted_intensity={
"b": np.array([0.1, 0.5, 0.3], dtype=np.float32),
"y": np.array([0.8, 0.6, 0.2], dtype=np.float32),
},
observed_intensity=None,
correlation=None,
feature_vectors=None,
)
# Write DLIB file
with tempfile.TemporaryDirectory() as tmpdir:
dlib_file = Path(tmpdir) / "test.dlib"
with DLIB(dlib_file) as writer:
writer.write([result])
# Verify file was created
assert dlib_file.exists()
# Verify database structure and content using SQLAlchemy
from ms2pip._utils import dlib as dlib_module
connection = dlib_module.open_sqlite(dlib_file)
try:
# Test that metadata table exists and has version
from sqlalchemy import select
version = connection.execute(
select(dlib_module.Metadata.c.Value).where(
dlib_module.Metadata.c.Key == "version"
)
).scalar()
assert version == dlib_module.DLIB_VERSION
# Test that Entry table has data (select specific columns to avoid nullable CompressedArray)
from sqlalchemy import func
entry_count = connection.execute(
select(func.count()).select_from(dlib_module.Entry)
).scalar()
assert entry_count == 1
# Select specific non-nullable columns
entry = connection.execute(
select(
dlib_module.Entry.c.PeptideSeq,
dlib_module.Entry.c.PrecursorCharge,
dlib_module.Entry.c.RTInSeconds,
dlib_module.Entry.c.MassArray,
dlib_module.Entry.c.IntensityArray,
)
).fetchone()
assert entry.PeptideSeq == "ACDE"
assert entry.PrecursorCharge == 2
assert entry.RTInSeconds == 100.0
assert len(entry.MassArray) == 6 # 3 b-ions + 3 y-ions
assert len(entry.IntensityArray) == 6
# Test that PeptideToProtein table has data
peptide_to_proteins = connection.execute(
select(dlib_module.PeptideToProtein)
).fetchall()
assert len(peptide_to_proteins) == 2
proteins = {p.ProteinAccession for p in peptide_to_proteins}
assert proteins == {"PROT1", "PROT2"}
assert all(p.PeptideSeq == "ACDE" for p in peptide_to_proteins)
finally:
connection.close()
def test_dlib_multiple_results(self):
"""Test writing multiple ProcessingResults to DLIB file."""
# Create multiple test results
results = []
for i, seq in enumerate(["ACDE/2", "PEPTIDE/2", "TESTK/2"]):
pep = Peptidoform(seq)
psm = PSM(
peptidoform=pep,
spectrum_id=i,
retention_time=100.0 + i * 10,
protein_list=[f"PROT{i}"],
)
result = ProcessingResult(
psm_index=i,
psm=psm,
theoretical_mz={
"b": np.array([72.04435, 175.05354], dtype=np.float32),
"y": np.array([148.0604, 263.0873], dtype=np.float32),
},
predicted_intensity={
"b": np.array([0.1, 0.5], dtype=np.float32),
"y": np.array([0.8, 0.6], dtype=np.float32),
},
observed_intensity=None,
correlation=None,
feature_vectors=None,
)
results.append(result)
# Write DLIB file
with tempfile.TemporaryDirectory() as tmpdir:
dlib_file = Path(tmpdir) / "test_multiple.dlib"
with DLIB(dlib_file) as writer:
writer.write(results)
# Verify all entries were written
from sqlalchemy import select
from ms2pip._utils import dlib as dlib_module
connection = dlib_module.open_sqlite(dlib_file)
try:
# Select specific columns to avoid nullable CompressedArray
entries = connection.execute(
select(
dlib_module.Entry.c.PeptideSeq,
dlib_module.Entry.c.RTInSeconds,
)
).fetchall()
assert len(entries) == 3
peptides = {e.PeptideSeq for e in entries}
assert peptides == {"ACDE", "PEPTIDE", "TESTK"}
# Verify retention times
rt_values = {e.RTInSeconds for e in entries}
assert rt_values == {100.0, 110.0, 120.0}
finally:
connection.close()
def test_dlib_sqlalchemy_select_syntax(self):
"""Test that SQLAlchemy v2 select() syntax works correctly."""
# This test specifically verifies the SQLAlchemy v2 compatibility changes
pep = Peptidoform("ACDE/2")
psm = PSM(
peptidoform=pep,
spectrum_id=1,
retention_time=100.0,
protein_list=["PROT1"],
)
result = ProcessingResult(
psm_index=0,
psm=psm,
theoretical_mz={
"b": np.array([72.04435], dtype=np.float32),
"y": np.array([148.0604], dtype=np.float32),
},
predicted_intensity={
"b": np.array([0.5], dtype=np.float32),
"y": np.array([0.8], dtype=np.float32),
},
observed_intensity=None,
correlation=None,
feature_vectors=None,
)
with tempfile.TemporaryDirectory() as tmpdir:
dlib_file = Path(tmpdir) / "test_sqlalchemy.dlib"
with DLIB(dlib_file) as writer:
writer.write([result])
# Test the specific SQLAlchemy operations that were modified
from sqlalchemy import select
from ms2pip._utils import dlib as dlib_module
connection = dlib_module.open_sqlite(dlib_file)
try:
# Test select(Table) syntax (changed from Table.select())
peptide_to_protein_results = connection.execute(
select(dlib_module.PeptideToProtein).where(
dlib_module.PeptideToProtein.c.ProteinAccession == "PROT1"
)
).fetchall()
assert len(peptide_to_protein_results) == 1
assert peptide_to_protein_results[0].PeptideSeq == "ACDE"
# Test select(column) syntax (changed from select([column]))
version = connection.execute(
select(dlib_module.Metadata.c.Value).where(
dlib_module.Metadata.c.Key == "version"
)
).scalar()
assert version is not None
assert version == dlib_module.DLIB_VERSION
finally:
connection.close()
def test_dlib_missing_retention_time(self):
"""Test that DLIB writing raises error when retention time is missing."""
pep = Peptidoform("ACDE/2")
psm = PSM(peptidoform=pep, spectrum_id=1) # No retention_time
result = ProcessingResult(
psm_index=0,
psm=psm,
theoretical_mz={"b": np.array([72.04435], dtype=np.float32)},
predicted_intensity={"b": np.array([0.5], dtype=np.float32)},
observed_intensity=None,
correlation=None,
feature_vectors=None,
)
with tempfile.TemporaryDirectory() as tmpdir:
dlib_file = Path(tmpdir) / "test_no_rt.dlib"
with pytest.raises(ValueError, match="Retention time required"):
with DLIB(dlib_file) as writer:
writer.write([result])