ms2pip
Advanced tools
@@ -1,7 +0,7 @@ | ||
| Metadata-Version: 2.2 | ||
| Metadata-Version: 2.4 | ||
| Name: ms2pip | ||
| Version: 4.1.0 | ||
| Version: 4.1.1 | ||
| Summary: MS2PIP: Accurate and versatile peptide fragmentation spectrum prediction. | ||
| Author: Ana Sílvia C. Silva | ||
| Author-email: Ralf Gabriels <ralf@gabriels.dev>, Sven Degroeve <sven.degroeve@ugent.be>, Arthur Declercq <arthur.declercq@ugent.be>, Kevin Velghe <kevin.velghe@ugent.be> | ||
| Author: Sven Degroeve, Ana Sílvia C. Silva | ||
| Author-email: Ralf Gabriels <ralf@gabriels.dev>, Arthur Declercq <arthur.declercq@ugent.be>, Kevin Velghe <kevin.velghe@ugent.be> | ||
| License: Apache License | ||
@@ -230,4 +230,3 @@ Version 2.0, January 2004 | ||
| Requires-Dist: pyteomics<5,>=3.5 | ||
| Requires-Dist: tomlkit<1,>=0.5 | ||
| Requires-Dist: sqlalchemy<2,>=1.3 | ||
| Requires-Dist: sqlalchemy<3,>=1.4 | ||
| Requires-Dist: click<9,>=7 | ||
@@ -258,2 +257,3 @@ Requires-Dist: xgboost<3,>=1.3 | ||
| Requires-Dist: sphinx-click; extra == "docs" | ||
| Dynamic: license-file | ||
@@ -260,0 +260,0 @@ .. image:: https://github.com/compomics/ms2pip_c/raw/releases/img/ms2pip_logo_1000px.png |
@@ -5,4 +5,3 @@ numpy<3,>=1.25 | ||
| pyteomics<5,>=3.5 | ||
| tomlkit<1,>=0.5 | ||
| sqlalchemy<2,>=1.3 | ||
| sqlalchemy<3,>=1.4 | ||
| click<9,>=7 | ||
@@ -9,0 +8,0 @@ xgboost<3,>=1.3 |
| # isort: skip_file | ||
| """MS2PIP: Accurate and versatile peptide fragmentation spectrum prediction.""" | ||
| __version__ = "4.1.0" | ||
| __version__ = "4.1.1" | ||
@@ -6,0 +6,0 @@ from warnings import filterwarnings |
+40
-34
@@ -21,2 +21,3 @@ """Database configuration for EncyclopeDIA DLIB SQLite format.""" | ||
| from sqlalchemy.dialects.sqlite import BLOB | ||
| from sqlalchemy.engine import Connection | ||
@@ -27,7 +28,8 @@ DLIB_VERSION = "0.1.14" | ||
| class CompressedArray(TypeDecorator): | ||
| """ Sqlite-like does not support arrays. | ||
| Let's use a custom type decorator. | ||
| """Sqlite-like does not support arrays. | ||
| Let's use a custom type decorator. | ||
| See http://docs.sqlalchemy.org/en/latest/core/types.html#sqlalchemy.types.TypeDecorator | ||
| See http://docs.sqlalchemy.org/en/latest/core/types.html#sqlalchemy.types.TypeDecorator | ||
| """ | ||
| impl = BLOB | ||
@@ -54,49 +56,53 @@ | ||
| big_float = numpy.dtype('>f4') | ||
| big_double = numpy.dtype('>f8') | ||
| big_float = numpy.dtype(">f4") | ||
| big_double = numpy.dtype(">f8") | ||
| Entry = Table( | ||
| 'entries', | ||
| "entries", | ||
| metadata, | ||
| Column('PrecursorMz', Float, nullable=False, index=True), | ||
| Column('PrecursorCharge', Integer, nullable=False), | ||
| Column('PeptideModSeq', String, nullable=False), | ||
| Column('PeptideSeq', String, nullable=False, index=True), | ||
| Column('Copies', Integer, nullable=False), | ||
| Column('RTInSeconds', Float, nullable=False), | ||
| Column('Score', Float, nullable=False), | ||
| Column('MassEncodedLength', Integer, nullable=False), | ||
| Column('MassArray', CompressedArray(big_double), nullable=False), | ||
| Column('IntensityEncodedLength', Integer, nullable=False), | ||
| Column('IntensityArray', CompressedArray(big_float), nullable=False), | ||
| Column('CorrelationEncodedLength', Integer, nullable=True), | ||
| Column('CorrelationArray', CompressedArray(big_float), nullable=True), | ||
| Column('RTInSecondsStart', Float, nullable=True), | ||
| Column('RTInSecondsStop', Float, nullable=True), | ||
| Column('MedianChromatogramEncodedLength', Integer, nullable=True), | ||
| Column('MedianChromatogramArray', CompressedArray(big_float), nullable=True), | ||
| Column('SourceFile', String, nullable=False), | ||
| Column("PrecursorMz", Float, nullable=False, index=True), | ||
| Column("PrecursorCharge", Integer, nullable=False), | ||
| Column("PeptideModSeq", String, nullable=False), | ||
| Column("PeptideSeq", String, nullable=False, index=True), | ||
| Column("Copies", Integer, nullable=False), | ||
| Column("RTInSeconds", Float, nullable=False), | ||
| Column("Score", Float, nullable=False), | ||
| Column("MassEncodedLength", Integer, nullable=False), | ||
| Column("MassArray", CompressedArray(big_double), nullable=False), | ||
| Column("IntensityEncodedLength", Integer, nullable=False), | ||
| Column("IntensityArray", CompressedArray(big_float), nullable=False), | ||
| Column("CorrelationEncodedLength", Integer, nullable=True), | ||
| Column("CorrelationArray", CompressedArray(big_float), nullable=True), | ||
| Column("RTInSecondsStart", Float, nullable=True), | ||
| Column("RTInSecondsStop", Float, nullable=True), | ||
| Column("MedianChromatogramEncodedLength", Integer, nullable=True), | ||
| Column("MedianChromatogramArray", CompressedArray(big_float), nullable=True), | ||
| Column("SourceFile", String, nullable=False), | ||
| ) | ||
| Index('ix_entries_PeptideModSeq_PrecursorCharge_SourceFile', Entry.c.PeptideModSeq, Entry.c.PrecursorCharge, Entry.c.SourceFile) | ||
| Index( | ||
| "ix_entries_PeptideModSeq_PrecursorCharge_SourceFile", | ||
| Entry.c.PeptideModSeq, | ||
| Entry.c.PrecursorCharge, | ||
| Entry.c.SourceFile, | ||
| ) | ||
| PeptideToProtein = Table( | ||
| 'peptidetoprotein', | ||
| "peptidetoprotein", | ||
| metadata, | ||
| Column('PeptideSeq', String, nullable=False, index=True), | ||
| Column('isDecoy', Boolean, nullable=True), | ||
| Column('ProteinAccession', String, nullable=False, index=True), | ||
| Column("PeptideSeq", String, nullable=False, index=True), | ||
| Column("isDecoy", Boolean, nullable=True), | ||
| Column("ProteinAccession", String, nullable=False, index=True), | ||
| ) | ||
| Metadata = Table( | ||
| 'metadata', | ||
| "metadata", | ||
| metadata, | ||
| Column('Key', String, nullable=False, index=True), | ||
| Column('Value', String, nullable=False), | ||
| Column("Key", String, nullable=False, index=True), | ||
| Column("Value", String, nullable=False), | ||
| ) | ||
| def open_sqlite(filename: Union[str, Path]) -> sqlalchemy.engine.Connection: | ||
| def open_sqlite(filename: Union[str, Path]) -> Connection: | ||
| engine = sqlalchemy.create_engine(f"sqlite:///{filename}") | ||
| metadata.bind = engine | ||
| return engine.connect() |
@@ -18,3 +18,3 @@ """ | ||
| "min_length": 8, | ||
| "max_length": 3, | ||
| "max_length": 30, | ||
| "cleavage_rule": "trypsin", | ||
@@ -21,0 +21,0 @@ "missed_cleavages": 2, |
@@ -50,2 +50,3 @@ """ | ||
| from pathlib import Path | ||
| from os import PathLike | ||
| from time import localtime, strftime | ||
@@ -57,3 +58,4 @@ from typing import Any, Dict, Generator, List, Optional, Union | ||
| from pyteomics import proforma | ||
| from sqlalchemy import engine, select | ||
| from sqlalchemy import select | ||
| from sqlalchemy.engine import Connection | ||
@@ -67,3 +69,3 @@ from ms2pip._utils import dlib | ||
| def write_spectra( | ||
| filename: Union[str, Path], | ||
| filename: Union[str, PathLike], | ||
| processing_results: List[ProcessingResult], | ||
@@ -98,3 +100,3 @@ file_format: str = "tsv", | ||
| def __init__(self, filename: Union[str, Path], write_mode: str = "w"): | ||
| def __init__(self, filename: Union[str, PathLike], write_mode: str = "w"): | ||
| self.filename = Path(filename).with_suffix(self.suffix) | ||
@@ -473,3 +475,3 @@ self.write_mode = write_mode | ||
| def __init__(self, filename: Union[str, Path], write_mode: str = "w"): | ||
| def __init__(self, filename: Union[str, PathLike], write_mode: str = "w"): | ||
| super().__init__(filename, write_mode) | ||
@@ -626,3 +628,3 @@ self.ssl_file = self.filename.with_suffix(self.ssl_suffix) | ||
| @staticmethod | ||
| def _get_last_ssl_scan_number(ssl_file: Union[str, Path, StringIO]): | ||
| def _get_last_ssl_scan_number(ssl_file: Union[str, PathLike, StringIO]): | ||
| """Read scan number of last line in a Bibliospec SSL file.""" | ||
@@ -662,3 +664,3 @@ if isinstance(ssl_file, StringIO): | ||
| connection = self._file_object | ||
| dlib.metadata.create_all() | ||
| dlib.metadata.create_all(connection.engine) | ||
| self._write_metadata(connection) | ||
@@ -692,7 +694,7 @@ self._write_entries(processing_results, connection, self.filename) | ||
| @staticmethod | ||
| def _write_metadata(connection: engine.Connection): | ||
| def _write_metadata(connection: Connection): | ||
| """Write metadata to DLIB SQLite file.""" | ||
| with connection.begin(): | ||
| version = connection.execute( | ||
| select([dlib.Metadata.c.Value]).where(dlib.Metadata.c.Key == "version") | ||
| select(dlib.Metadata.c.Value).where(dlib.Metadata.c.Key == "version") | ||
| ).scalar() | ||
@@ -710,4 +712,4 @@ if version is None: | ||
| processing_results: List[ProcessingResult], | ||
| connection: engine.Connection, | ||
| output_filename: str, | ||
| connection: Connection, | ||
| output_filename: Union[str, PathLike], | ||
| ): | ||
@@ -742,3 +744,3 @@ """Write spectra to DLIB SQLite file.""" | ||
| @staticmethod | ||
| def _write_peptide_to_protein(results: List[ProcessingResult], connection: engine.Connection): | ||
| def _write_peptide_to_protein(results: List[ProcessingResult], connection: Connection): | ||
| """Write peptide-to-protein mappings to DLIB SQLite file.""" | ||
@@ -756,3 +758,3 @@ peptide_to_proteins = { | ||
| for peptide_to_protein in connection.execute( | ||
| dlib.PeptideToProtein.select().where( | ||
| select(dlib.PeptideToProtein).where( | ||
| dlib.PeptideToProtein.c.ProteinAccession.in_(proteins) | ||
@@ -759,0 +761,0 @@ ) |
+6
-6
@@ -1,7 +0,7 @@ | ||
| Metadata-Version: 2.2 | ||
| Metadata-Version: 2.4 | ||
| Name: ms2pip | ||
| Version: 4.1.0 | ||
| Version: 4.1.1 | ||
| Summary: MS2PIP: Accurate and versatile peptide fragmentation spectrum prediction. | ||
| Author: Ana Sílvia C. Silva | ||
| Author-email: Ralf Gabriels <ralf@gabriels.dev>, Sven Degroeve <sven.degroeve@ugent.be>, Arthur Declercq <arthur.declercq@ugent.be>, Kevin Velghe <kevin.velghe@ugent.be> | ||
| Author: Sven Degroeve, Ana Sílvia C. Silva | ||
| Author-email: Ralf Gabriels <ralf@gabriels.dev>, Arthur Declercq <arthur.declercq@ugent.be>, Kevin Velghe <kevin.velghe@ugent.be> | ||
| License: Apache License | ||
@@ -230,4 +230,3 @@ Version 2.0, January 2004 | ||
| Requires-Dist: pyteomics<5,>=3.5 | ||
| Requires-Dist: tomlkit<1,>=0.5 | ||
| Requires-Dist: sqlalchemy<2,>=1.3 | ||
| Requires-Dist: sqlalchemy<3,>=1.4 | ||
| Requires-Dist: click<9,>=7 | ||
@@ -258,2 +257,3 @@ Requires-Dist: xgboost<3,>=1.3 | ||
| Requires-Dist: sphinx-click; extra == "docs" | ||
| Dynamic: license-file | ||
@@ -260,0 +260,0 @@ .. image:: https://github.com/compomics/ms2pip_c/raw/releases/img/ms2pip_logo_1000px.png |
+6
-4
@@ -19,3 +19,3 @@ [project] | ||
| { name = "Ralf Gabriels", email = "ralf@gabriels.dev" }, | ||
| { name = "Sven Degroeve", email = "sven.degroeve@ugent.be" }, | ||
| { name = "Sven Degroeve" }, | ||
| { name = "Arthur Declercq", email = "arthur.declercq@ugent.be" }, | ||
@@ -41,4 +41,3 @@ { name = "Kevin Velghe", email = "kevin.velghe@ugent.be" }, | ||
| "pyteomics>=3.5,<5", | ||
| "tomlkit>=0.5,<1", | ||
| "sqlalchemy>=1.3,<2", | ||
| "sqlalchemy>=1.4,<3", | ||
| "click>=7,<9", | ||
@@ -95,7 +94,10 @@ "xgboost>=1.3,<3", | ||
| [tool.cibuildwheel] | ||
| build = "cp39*-manylinux_x86_64 cp39*-win_amd64 cp39*-macosx_x86_64 cp39*-macosx_arm64" | ||
| build = "cp3*-manylinux_x86_64 cp3*-win_amd64 cp3*-macosx_x86_64 cp3*-macosx_arm64" | ||
| skip = "cp36-* cp37-* cp38-*" | ||
| test-command = "ms2pip --help" | ||
| # Prevent building from source for packages with complex C/C++ dependencies | ||
| environment = { PIP_ONLY_BINARY = "pyarrow,pandas,numpy,lxml,xgboost" } | ||
| [tool.cibuildwheel.macos] | ||
| before-all = "brew install libomp" |
@@ -1,6 +0,12 @@ | ||
| from psm_utils import Peptidoform | ||
| import tempfile | ||
| from pathlib import Path | ||
| from ms2pip.spectrum_output import MSP, Bibliospec, DLIB | ||
| import numpy as np | ||
| import pytest | ||
| from psm_utils import PSM, Peptidoform | ||
| from ms2pip.result import ProcessingResult | ||
| from ms2pip.spectrum_output import DLIB, MSP, Bibliospec | ||
| class TestMSP: | ||
@@ -48,1 +54,224 @@ def test__format_modification_string(self): | ||
| assert DLIB._format_modified_sequence(Peptidoform(test_in)) == expected_out | ||
| def test_dlib_database_creation(self): | ||
| """Test that DLIB file creation works with SQLAlchemy (integration test).""" | ||
| # Create test data | ||
| pep = Peptidoform("ACDE/2") | ||
| psm = PSM( | ||
| peptidoform=pep, | ||
| spectrum_id=1, | ||
| retention_time=100.0, | ||
| protein_list=["PROT1", "PROT2"], | ||
| ) | ||
| result = ProcessingResult( | ||
| psm_index=0, | ||
| psm=psm, | ||
| theoretical_mz={ | ||
| "b": np.array([72.04435, 175.05354, 290.08047], dtype=np.float32), | ||
| "y": np.array([148.0604, 263.0873, 366.0965], dtype=np.float32), | ||
| }, | ||
| predicted_intensity={ | ||
| "b": np.array([0.1, 0.5, 0.3], dtype=np.float32), | ||
| "y": np.array([0.8, 0.6, 0.2], dtype=np.float32), | ||
| }, | ||
| observed_intensity=None, | ||
| correlation=None, | ||
| feature_vectors=None, | ||
| ) | ||
| # Write DLIB file | ||
| with tempfile.TemporaryDirectory() as tmpdir: | ||
| dlib_file = Path(tmpdir) / "test.dlib" | ||
| with DLIB(dlib_file) as writer: | ||
| writer.write([result]) | ||
| # Verify file was created | ||
| assert dlib_file.exists() | ||
| # Verify database structure and content using SQLAlchemy | ||
| from ms2pip._utils import dlib as dlib_module | ||
| connection = dlib_module.open_sqlite(dlib_file) | ||
| try: | ||
| # Test that metadata table exists and has version | ||
| from sqlalchemy import select | ||
| version = connection.execute( | ||
| select(dlib_module.Metadata.c.Value).where( | ||
| dlib_module.Metadata.c.Key == "version" | ||
| ) | ||
| ).scalar() | ||
| assert version == dlib_module.DLIB_VERSION | ||
| # Test that Entry table has data (select specific columns to avoid nullable CompressedArray) | ||
| from sqlalchemy import func | ||
| entry_count = connection.execute( | ||
| select(func.count()).select_from(dlib_module.Entry) | ||
| ).scalar() | ||
| assert entry_count == 1 | ||
| # Select specific non-nullable columns | ||
| entry = connection.execute( | ||
| select( | ||
| dlib_module.Entry.c.PeptideSeq, | ||
| dlib_module.Entry.c.PrecursorCharge, | ||
| dlib_module.Entry.c.RTInSeconds, | ||
| dlib_module.Entry.c.MassArray, | ||
| dlib_module.Entry.c.IntensityArray, | ||
| ) | ||
| ).fetchone() | ||
| assert entry.PeptideSeq == "ACDE" | ||
| assert entry.PrecursorCharge == 2 | ||
| assert entry.RTInSeconds == 100.0 | ||
| assert len(entry.MassArray) == 6 # 3 b-ions + 3 y-ions | ||
| assert len(entry.IntensityArray) == 6 | ||
| # Test that PeptideToProtein table has data | ||
| peptide_to_proteins = connection.execute( | ||
| select(dlib_module.PeptideToProtein) | ||
| ).fetchall() | ||
| assert len(peptide_to_proteins) == 2 | ||
| proteins = {p.ProteinAccession for p in peptide_to_proteins} | ||
| assert proteins == {"PROT1", "PROT2"} | ||
| assert all(p.PeptideSeq == "ACDE" for p in peptide_to_proteins) | ||
| finally: | ||
| connection.close() | ||
| def test_dlib_multiple_results(self): | ||
| """Test writing multiple ProcessingResults to DLIB file.""" | ||
| # Create multiple test results | ||
| results = [] | ||
| for i, seq in enumerate(["ACDE/2", "PEPTIDE/2", "TESTK/2"]): | ||
| pep = Peptidoform(seq) | ||
| psm = PSM( | ||
| peptidoform=pep, | ||
| spectrum_id=i, | ||
| retention_time=100.0 + i * 10, | ||
| protein_list=[f"PROT{i}"], | ||
| ) | ||
| result = ProcessingResult( | ||
| psm_index=i, | ||
| psm=psm, | ||
| theoretical_mz={ | ||
| "b": np.array([72.04435, 175.05354], dtype=np.float32), | ||
| "y": np.array([148.0604, 263.0873], dtype=np.float32), | ||
| }, | ||
| predicted_intensity={ | ||
| "b": np.array([0.1, 0.5], dtype=np.float32), | ||
| "y": np.array([0.8, 0.6], dtype=np.float32), | ||
| }, | ||
| observed_intensity=None, | ||
| correlation=None, | ||
| feature_vectors=None, | ||
| ) | ||
| results.append(result) | ||
| # Write DLIB file | ||
| with tempfile.TemporaryDirectory() as tmpdir: | ||
| dlib_file = Path(tmpdir) / "test_multiple.dlib" | ||
| with DLIB(dlib_file) as writer: | ||
| writer.write(results) | ||
| # Verify all entries were written | ||
| from sqlalchemy import select | ||
| from ms2pip._utils import dlib as dlib_module | ||
| connection = dlib_module.open_sqlite(dlib_file) | ||
| try: | ||
| # Select specific columns to avoid nullable CompressedArray | ||
| entries = connection.execute( | ||
| select( | ||
| dlib_module.Entry.c.PeptideSeq, | ||
| dlib_module.Entry.c.RTInSeconds, | ||
| ) | ||
| ).fetchall() | ||
| assert len(entries) == 3 | ||
| peptides = {e.PeptideSeq for e in entries} | ||
| assert peptides == {"ACDE", "PEPTIDE", "TESTK"} | ||
| # Verify retention times | ||
| rt_values = {e.RTInSeconds for e in entries} | ||
| assert rt_values == {100.0, 110.0, 120.0} | ||
| finally: | ||
| connection.close() | ||
| def test_dlib_sqlalchemy_select_syntax(self): | ||
| """Test that SQLAlchemy v2 select() syntax works correctly.""" | ||
| # This test specifically verifies the SQLAlchemy v2 compatibility changes | ||
| pep = Peptidoform("ACDE/2") | ||
| psm = PSM( | ||
| peptidoform=pep, | ||
| spectrum_id=1, | ||
| retention_time=100.0, | ||
| protein_list=["PROT1"], | ||
| ) | ||
| result = ProcessingResult( | ||
| psm_index=0, | ||
| psm=psm, | ||
| theoretical_mz={ | ||
| "b": np.array([72.04435], dtype=np.float32), | ||
| "y": np.array([148.0604], dtype=np.float32), | ||
| }, | ||
| predicted_intensity={ | ||
| "b": np.array([0.5], dtype=np.float32), | ||
| "y": np.array([0.8], dtype=np.float32), | ||
| }, | ||
| observed_intensity=None, | ||
| correlation=None, | ||
| feature_vectors=None, | ||
| ) | ||
| with tempfile.TemporaryDirectory() as tmpdir: | ||
| dlib_file = Path(tmpdir) / "test_sqlalchemy.dlib" | ||
| with DLIB(dlib_file) as writer: | ||
| writer.write([result]) | ||
| # Test the specific SQLAlchemy operations that were modified | ||
| from sqlalchemy import select | ||
| from ms2pip._utils import dlib as dlib_module | ||
| connection = dlib_module.open_sqlite(dlib_file) | ||
| try: | ||
| # Test select(Table) syntax (changed from Table.select()) | ||
| peptide_to_protein_results = connection.execute( | ||
| select(dlib_module.PeptideToProtein).where( | ||
| dlib_module.PeptideToProtein.c.ProteinAccession == "PROT1" | ||
| ) | ||
| ).fetchall() | ||
| assert len(peptide_to_protein_results) == 1 | ||
| assert peptide_to_protein_results[0].PeptideSeq == "ACDE" | ||
| # Test select(column) syntax (changed from select([column])) | ||
| version = connection.execute( | ||
| select(dlib_module.Metadata.c.Value).where( | ||
| dlib_module.Metadata.c.Key == "version" | ||
| ) | ||
| ).scalar() | ||
| assert version is not None | ||
| assert version == dlib_module.DLIB_VERSION | ||
| finally: | ||
| connection.close() | ||
| def test_dlib_missing_retention_time(self): | ||
| """Test that DLIB writing raises error when retention time is missing.""" | ||
| pep = Peptidoform("ACDE/2") | ||
| psm = PSM(peptidoform=pep, spectrum_id=1) # No retention_time | ||
| result = ProcessingResult( | ||
| psm_index=0, | ||
| psm=psm, | ||
| theoretical_mz={"b": np.array([72.04435], dtype=np.float32)}, | ||
| predicted_intensity={"b": np.array([0.5], dtype=np.float32)}, | ||
| observed_intensity=None, | ||
| correlation=None, | ||
| feature_vectors=None, | ||
| ) | ||
| with tempfile.TemporaryDirectory() as tmpdir: | ||
| dlib_file = Path(tmpdir) / "test_no_rt.dlib" | ||
| with pytest.raises(ValueError, match="Retention time required"): | ||
| with DLIB(dlib_file) as writer: | ||
| writer.write([result]) |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
35515142
0.03%4224
5.23%