scoring-matrices
Advanced tools
+3
-4
| Metadata-Version: 2.1 | ||
| Name: scoring-matrices | ||
| Version: 0.1.1 | ||
| Version: 0.2.0 | ||
| Summary: Dependency free, Cython-compatible scoring matrices to use with biological sequences. | ||
@@ -8,3 +8,3 @@ Home-page: https://github.com/althonos/score-matrices | ||
| Author-email: martin.larralde@embl.de | ||
| License: GPLv3+ | ||
| License: MIT | ||
| Project-URL: Bug Tracker, https://github.com/althonos/scoring-matrices/issues | ||
@@ -52,3 +52,2 @@ Project-URL: Changelog, https://github.com/althonos/scoring-matrices/blob/main/CHANGELOG.md | ||
| [](https://github.com/althonos/scoring-matrices/) | ||
| [](https://git.embl.de/larralde/scoring-matrices/) | ||
| [](https://github.com/althonos/scoring-matrices/issues) | ||
@@ -66,3 +65,3 @@ [](https://scoring-matrices.readthedocs.io) | ||
| The `scoring-matrices` packages is a dependency-free, batteries included library | ||
| The `scoring-matrices` package is a dependency-free, batteries included library | ||
| to handle and distribute common substitution matrices: | ||
@@ -69,0 +68,0 @@ |
+1
-2
@@ -15,3 +15,2 @@ # 🧬🔠 `scoring-matrices` [](https://github.com/althonos/scoring-matrices/stargazers) | ||
| [](https://github.com/althonos/scoring-matrices/) | ||
| [](https://git.embl.de/larralde/scoring-matrices/) | ||
| [](https://github.com/althonos/scoring-matrices/issues) | ||
@@ -29,3 +28,3 @@ [](https://scoring-matrices.readthedocs.io) | ||
| The `scoring-matrices` packages is a dependency-free, batteries included library | ||
| The `scoring-matrices` package is a dependency-free, batteries included library | ||
| to handle and distribute common substitution matrices: | ||
@@ -32,0 +31,0 @@ |
| Metadata-Version: 2.1 | ||
| Name: scoring-matrices | ||
| Version: 0.1.1 | ||
| Version: 0.2.0 | ||
| Summary: Dependency free, Cython-compatible scoring matrices to use with biological sequences. | ||
@@ -8,3 +8,3 @@ Home-page: https://github.com/althonos/score-matrices | ||
| Author-email: martin.larralde@embl.de | ||
| License: GPLv3+ | ||
| License: MIT | ||
| Project-URL: Bug Tracker, https://github.com/althonos/scoring-matrices/issues | ||
@@ -52,3 +52,2 @@ Project-URL: Changelog, https://github.com/althonos/scoring-matrices/blob/main/CHANGELOG.md | ||
| [](https://github.com/althonos/scoring-matrices/) | ||
| [](https://git.embl.de/larralde/scoring-matrices/) | ||
| [](https://github.com/althonos/scoring-matrices/issues) | ||
@@ -66,3 +65,3 @@ [](https://scoring-matrices.readthedocs.io) | ||
| The `scoring-matrices` packages is a dependency-free, batteries included library | ||
| The `scoring-matrices` package is a dependency-free, batteries included library | ||
| to handle and distribute common substitution matrices: | ||
@@ -69,0 +68,0 @@ |
@@ -1,2 +0,2 @@ | ||
| __version__ = "0.1.1" | ||
| __version__ = "0.2.0" | ||
| __author__ = "Martin Larralde <martin.larralde@embl.de>" | ||
@@ -3,0 +3,0 @@ __license__ = "MIT" |
@@ -14,9 +14,13 @@ # distutils: language = c | ||
| cdef float** _matrix | ||
| cdef char* _alphabet | ||
| cdef int _allocate(self, size_t length) except 1 nogil | ||
| cdef const float* data(self) except NULL nogil | ||
| cdef const float** matrix(self) except NULL nogil | ||
| cdef const float* data_ptr(self) except NULL nogil | ||
| cdef const float** matrix_ptr(self) except NULL nogil | ||
| cdef const char* alphabet_ptr(self) except NULL nogil | ||
| cdef size_t size(self) noexcept nogil | ||
| cpdef bint is_integer(self) | ||
| cpdef bint is_symmetric(self) | ||
| cpdef float min(self) | ||
@@ -23,0 +27,0 @@ cpdef float max(self) |
@@ -6,3 +6,2 @@ import typing | ||
| class ScoringMatrix: | ||
@@ -16,2 +15,18 @@ DEFAULT_ALPHABET: ClassVar[str] | ||
| def from_str(cls: Type[S], text: str, name: Optional[str] = None) -> S: ... | ||
| @classmethod | ||
| def from_diagonal( | ||
| cls: Type[S], | ||
| diagonal: Iterable[float], | ||
| mismatch_score: float = 0.0, | ||
| alphabet: str = DEFAULT_ALPHABET, | ||
| name: Optional[str] = None, | ||
| ) -> S: ... | ||
| @classmethod | ||
| def from_match_mismatch( | ||
| cls: Type[S], | ||
| match_score: float = 1.0, | ||
| mismatch_score: float = -0.0, | ||
| alphabet: str = DEFAULT_ALPHABET, | ||
| name: Optional[str] = None, | ||
| ) -> S: ... | ||
| def __init__( | ||
@@ -38,4 +53,5 @@ self, | ||
| def is_integer(self) -> bool: ... | ||
| def is_symmetric(self) -> bool: ... | ||
| def min(self) -> float: ... | ||
| def max(self) -> float: ... | ||
| def shuffle(self: S, alphabet: str) -> S: ... | ||
| def shuffle(self: S, alphabet: str) -> S: ... |
+142
-16
| # distutils: language = c | ||
| # cython: language_level=3, linetrace=True, binding=True | ||
| """Dependency free, Cython-compatible scoring matrices for bioinformatics. | ||
| """ | ||
@@ -10,3 +12,3 @@ cimport cython | ||
| from libc.stdlib cimport realloc, free | ||
| from libc.string cimport memcpy | ||
| from libc.string cimport memcpy, memset | ||
@@ -103,2 +105,65 @@ from .matrices cimport _NAMES, _ALPHABETS, _SIZES, _MATRICES | ||
| @classmethod | ||
| def from_diagonal( | ||
| cls, | ||
| object diagonal, | ||
| float mismatch_score=0.0, | ||
| str alphabet not None = DEFAULT_ALPHABET, | ||
| str name = None | ||
| ): | ||
| """Create a scoring matrix from a diagonal vector. | ||
| Arguments: | ||
| diagonal (sequence of `float`): The diagonal of the scoring | ||
| matrix, used to score character matches. | ||
| mismatch_score (`float`): The mismatch score to use for | ||
| every mismatches. | ||
| alphabet (`str`): The alphabet to use with the scoring matrix. | ||
| name (`str` or `None`): A name for the scoring matrix, if any. | ||
| Example: | ||
| >>> matrix = ScoringMatrix.from_diagonal( | ||
| ... diagonal=[2, 2, 3, 3], | ||
| ... mismatch_score=-3.0, | ||
| ... alphabet="ATGC", | ||
| ... ) | ||
| >>> for row in matrix: | ||
| ... print(row) | ||
| [2.0, -3.0, -3.0, -3.0] | ||
| [-3.0, 2.0, -3.0, -3.0] | ||
| [-3.0, -3.0, 3.0, -3.0] | ||
| [-3.0, -3.0, -3.0, 3.0] | ||
| .. versionadded:: 0.2.0 | ||
| """ | ||
| cdef list matrix = [] | ||
| cdef size_t length = len(alphabet) | ||
| for i, x in enumerate(diagonal): | ||
| row = [ x if j == i else mismatch_score for j in range(length) ] | ||
| matrix.append(row) | ||
| return cls(matrix, alphabet=alphabet, name=name) | ||
| @classmethod | ||
| def from_match_mismatch( | ||
| cls, | ||
| float match_score = 1.0, | ||
| float mismatch_score = -1.0, | ||
| str alphabet not None = DEFAULT_ALPHABET, | ||
| str name = None, | ||
| ): | ||
| """Create a scoring matrix from two match/mismatch scores. | ||
| .. versionadded:: 0.2.0 | ||
| """ | ||
| cdef list matrix = [] | ||
| cdef size_t length = len(alphabet) | ||
| for i in range(length): | ||
| row = [ match_score if j == i else mismatch_score for j in range(length) ] | ||
| matrix.append(row) | ||
| return cls(matrix, alphabet=alphabet, name=name) | ||
| # --- Magic methods -------------------------------------------------------- | ||
@@ -119,2 +184,27 @@ | ||
| """Create a new scoring matrix. | ||
| Arguments: | ||
| matrix (array-like of `float`): A square matrix with dimensions | ||
| equal to the ``alphabet`` length, storing the scores for each | ||
| pair of characters. | ||
| alphabet (`str`): The alphabet used to index the rows and columns | ||
| of the scoring matrix. | ||
| name (`str` or `None`): The name of the scoring matrix, if any. | ||
| Example: | ||
| >>> matrix = ScoringMatrix( | ||
| ... [[91, -114, -31, -123], | ||
| ... [-114, 100, -125, -31], | ||
| ... [-31, -125, 100, -114], | ||
| ... [-123, -31, -114, 91]], | ||
| ... alphabet="ACGT", | ||
| ... name="HOXD70", | ||
| ... ) | ||
| Raises: | ||
| `ValueError`: When the matrix is not a valid matrix or does not | ||
| match the given alphabet. | ||
| `MemoryError`: When memory for storing the scores could not be | ||
| allocated successfully. | ||
| """ | ||
@@ -137,4 +227,4 @@ cdef ssize_t i | ||
| assert self._data != NULL | ||
| assert self._matrix != NULL | ||
| for i, c in enumerate(self.alphabet): | ||
| self._alphabet[i] = ord(c) | ||
| for i, row in enumerate(matrix): | ||
@@ -255,7 +345,5 @@ if len(row) != size: | ||
| self._data = <float*> realloc(self._data, sizeof(float) * size * size) | ||
| if self._data is NULL: | ||
| raise MemoryError("Failed to allocate matrix") | ||
| self._matrix = <float**> realloc(self._matrix, sizeof(float*) * size) | ||
| if self._matrix is NULL: | ||
| self._alphabet = <char*> realloc(self._alphabet, sizeof(char) * (size + 1)) | ||
| if self._data is NULL or self._matrix is NULL or self._alphabet is NULL: | ||
| raise MemoryError("Failed to allocate matrix") | ||
@@ -267,2 +355,3 @@ | ||
| self._matrix[i] = &self._data[i * self._size] | ||
| memset(self._alphabet, 0, sizeof(char) * (size + 1)) | ||
@@ -273,3 +362,16 @@ return 0 | ||
| cdef const float* data(self) except NULL nogil: | ||
| cdef size_t size(self) noexcept nogil: | ||
| """Get the size of the scoring matrix. | ||
| """ | ||
| return self._size | ||
| cdef const char* alphabet_ptr(self) except NULL nogil: | ||
| """Get the alphabet of the scoring matrix as a C-string. | ||
| """ | ||
| if self._alphabet == NULL: | ||
| with gil: | ||
| raise RuntimeError("uninitialized scoring matrix") | ||
| return <const char*> self._alphabet | ||
| cdef const float* data_ptr(self) except NULL nogil: | ||
| """Get the matrix scores as a dense array. | ||
@@ -282,3 +384,3 @@ """ | ||
| cdef const float** matrix(self) except NULL nogil: | ||
| cdef const float** matrix_ptr(self) except NULL nogil: | ||
| """Get the matrix scores as an array of pointers. | ||
@@ -299,2 +401,5 @@ """ | ||
| Returns: | ||
| `bool`: `True` if the matrix only contains integer scores. | ||
| Example: | ||
@@ -309,11 +414,10 @@ >>> blosum62 = ScoringMatrix.from_name("BLOSUM62") | ||
| """ | ||
| assert self._data != NULL | ||
| cdef size_t i | ||
| cdef float x | ||
| cdef bint integer = True | ||
| cdef const float* _data = self.data_ptr() | ||
| cdef size_t i | ||
| cdef float x | ||
| cdef bint integer = True | ||
| with nogil: | ||
| for i in range(self._nitems): | ||
| x = self._data[i] | ||
| x = _data[i] | ||
| if lrintf(x) != x: | ||
@@ -324,2 +428,24 @@ integer = False | ||
| cpdef bint is_symmetric(self): | ||
| """Test whether the scoring matrix is symmetric. | ||
| Returns: | ||
| `bool`: `True` if the matrix is a symmetric matrix. | ||
| .. versionadded:: 0.2.0 | ||
| """ | ||
| cdef size_t i | ||
| cdef size_t j | ||
| cdef bint symmetric = True | ||
| cdef const float** _matrix = self.matrix_ptr() | ||
| with nogil: | ||
| for i in range(self._nitems): | ||
| for j in range(i + 1, self._nitems): | ||
| if _matrix[i][j] != _matrix[j][i]: | ||
| symmetric = False | ||
| break | ||
| return symmetric | ||
| cpdef float min(self): | ||
@@ -403,3 +529,3 @@ """Get the minimum score of the scoring matrix. | ||
| for letter in alphabet: | ||
| row = self[letter] | ||
| row = self[<str> letter] | ||
| matrix.append([row[j] for j in indices]) | ||
@@ -406,0 +532,0 @@ |
@@ -53,2 +53,29 @@ import pickle | ||
| def test_from_diagonal(self): | ||
| m = ScoringMatrix.from_diagonal([1, 2, 3, 4], 0.0, alphabet="ATGC") | ||
| self.assertEqual(m[0], [1.0, 0.0, 0.0, 0.0]) | ||
| self.assertEqual(m[1], [0.0, 2.0, 0.0, 0.0]) | ||
| self.assertEqual(m[2], [0.0, 0.0, 3.0, 0.0]) | ||
| self.assertEqual(m[3], [0.0, 0.0, 0.0, 4.0]) | ||
| m = ScoringMatrix.from_diagonal([1, 2, 3, 4], -1.0, alphabet="ATGC") | ||
| self.assertEqual(m[0], [ 1.0, -1.0, -1.0, -1.0]) | ||
| self.assertEqual(m[1], [-1.0, 2.0, -1.0, -1.0]) | ||
| self.assertEqual(m[2], [-1.0, -1.0, 3.0, -1.0]) | ||
| self.assertEqual(m[3], [-1.0, -1.0, -1.0, 4.0]) | ||
| def test_from_diagonal_invalid_length(self): | ||
| self.assertRaises( | ||
| ValueError, | ||
| ScoringMatrix.from_diagonal, | ||
| [ 3, 3, 3, 3, 3, 3 ], | ||
| alphabet="ATGC" | ||
| ) | ||
| self.assertRaises( | ||
| ValueError, | ||
| ScoringMatrix.from_diagonal, | ||
| [ 3, 3, 3 ], | ||
| alphabet="ATGC" | ||
| ) | ||
| def test_list(self): | ||
@@ -72,2 +99,7 @@ aa = ScoringMatrix.from_name("BLOSUM50") | ||
| def test_init_empty(self): | ||
| m = ScoringMatrix([], alphabet="") | ||
| self.assertEqual(len(m), 0) | ||
| self.assertFalse(bool(m)) | ||
| def test_init_invalid_length(self): | ||
@@ -107,2 +139,12 @@ with self.assertRaises(ValueError): | ||
| self.assertEqual(sm1.alphabet, sm2.alphabet) | ||
| self.assertEqual(list(sm1), list(sm2)) | ||
| self.assertEqual(list(sm1), list(sm2)) | ||
| def test_shuffle_invalid_alphabet(self): | ||
| matrix = ScoringMatrix.from_name("BLOSUM62") | ||
| self.assertRaises(KeyError, matrix.shuffle, "ARNJOU") | ||
| def test_shuffle_empty(self): | ||
| matrix = ScoringMatrix.from_name("BLOSUM62") | ||
| empty = matrix.shuffle("") | ||
| self.assertEqual(len(empty), 0) | ||
| self.assertFalse(bool(empty)) |
+1
-1
@@ -10,3 +10,3 @@ [metadata] | ||
| long_description_content_type = text/markdown | ||
| license = GPLv3+ | ||
| license = MIT | ||
| platform = any | ||
@@ -13,0 +13,0 @@ keywords = bioinformatics, sequence, substitution, matrix, score |
+6
-1
@@ -138,3 +138,8 @@ import configparser | ||
| nitems = len(matrix) * len(matrix) | ||
| dst.write(f"float _MATRIX_{id_}[{nitems}] = {{ { ', '.join(map(repr, itertools.chain.from_iterable(matrix))) } }};\n") | ||
| dst.write(f"float _MATRIX_{id_}[{nitems}] = {{") | ||
| for i, item in enumerate(itertools.chain.from_iterable(matrix)): | ||
| if i != 0: | ||
| dst.write(", ") | ||
| dst.write(f"{item!r}F") | ||
| dst.write("};\n") | ||
@@ -141,0 +146,0 @@ dst.write(f"const float* _MATRICES[{len(names) + 1}] = {{") |
Sorry, the diff of this file is too big to display
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
364888
15.62%541
12.24%