Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoInstallSign in
Socket

scoring-matrices

Package Overview
Dependencies
Maintainers
1
Versions
11
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

scoring-matrices - npm Package Compare versions

Comparing version
0.2.2
to
0.3.0rc1
+331
.github/workflows/package.yml
name: Package
on:
push:
tags:
- v*
jobs:
wheel-linux-aarch64:
name: Build Linux wheels (Aarch64)
runs-on: ubuntu-22.04
strategy:
matrix:
python-tag:
- cp37-manylinux_aarch64
- cp38-manylinux_aarch64
- cp39-manylinux_aarch64
- cp310-manylinux_aarch64
- cp311-manylinux_aarch64
- cp312-manylinux_aarch64
- pp37-manylinux_aarch64
- pp38-manylinux_aarch64
- pp39-manylinux_aarch64
- pp310-manylinux_aarch64
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Set up QEMU
id: qemu
uses: docker/setup-qemu-action@v2
with:
platforms: all
- name: Build manylinux wheels
uses: pypa/cibuildwheel@v2.16.5
env:
CIBW_ARCHS: aarch64
CIBW_BUILD: ${{ matrix.python-tag }}
CIBW_BUILD_VERBOSITY: 2
CIBW_BEFORE_BUILD: pip install cython
CIBW_TEST_COMMAND: python -m unittest scoring_matrices.tests -vv
CIBW_TEST_REQUIRES: importlib-resources
with:
output-dir: dist
- uses: actions/upload-artifact@v4
with:
name: wheels-${{ matrix.python-tag }}
path: dist/*
wheel-linux-x86_64:
name: Build Linux wheels (x86-64)
runs-on: ubuntu-20.04
strategy:
matrix:
python-tag:
- cp37-manylinux_x86_64
- cp38-manylinux_x86_64
- cp39-manylinux_x86_64
- cp310-manylinux_x86_64
- cp311-manylinux_x86_64
- cp312-manylinux_x86_64
- pp37-manylinux_x86_64
- pp38-manylinux_x86_64
- pp39-manylinux_x86_64
- pp310-manylinux_x86_64
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Build manylinux wheels
uses: pypa/cibuildwheel@v2.16.5
env:
CIBW_ARCHS: x86_64
CIBW_BUILD: ${{ matrix.python-tag }}
CIBW_BEFORE_BUILD: pip install cython
CIBW_BUILD_VERBOSITY: 2
CIBW_TEST_COMMAND: python -m unittest scoring_matrices.tests -vv
CIBW_TEST_REQUIRES: importlib-resources
with:
output-dir: dist
- uses: actions/upload-artifact@v4
with:
name: wheels-${{ matrix.python-tag }}
path: dist/*
wheel-macos-x86_64:
name: Build MacOS wheels (x86-64)
runs-on: macOS-12
strategy:
matrix:
python-tag:
- cp37-macosx_x86_64
- cp38-macosx_x86_64
- cp39-macosx_x86_64
- cp310-macosx_x86_64
- cp311-macosx_x86_64
- cp312-macosx_x86_64
- pp37-macosx_x86_64
- pp38-macosx_x86_64
- pp39-macosx_x86_64
- pp310-macosx_x86_64
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Build manylinux wheels
uses: pypa/cibuildwheel@v2.16.5
env:
CIBW_ARCHS: x86_64
CIBW_BUILD: ${{ matrix.python-tag }}
CIBW_BEFORE_BUILD: pip install cython
CIBW_BUILD_VERBOSITY: 2
CIBW_TEST_COMMAND: python -m unittest scoring_matrices.tests -vv
CIBW_TEST_REQUIRES: importlib-resources
CIBW_ENVIRONMENT_MACOS: MACOSX_DEPLOYMENT_TARGET=10.12
with:
output-dir: dist
- uses: actions/upload-artifact@v4
with:
name: wheels-${{ matrix.python-tag }}
path: dist/*
wheel-macos-aarch64:
name: Build MacOS wheels (Aarch64)
runs-on: macOS-12
strategy:
matrix:
python-tag:
- cp38-macosx_arm64
- cp39-macosx_arm64
- cp310-macosx_arm64
- cp311-macosx_arm64
- cp312-macosx_arm64
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
target: aarch64-apple-darwin
- name: Build manylinux wheels
uses: pypa/cibuildwheel@v2.16.5
env:
CIBW_ARCHS: arm64
CIBW_BUILD: ${{ matrix.python-tag }}
CIBW_BEFORE_BUILD: pip install cython
CIBW_BUILD_VERBOSITY: 2
CIBW_TEST_COMMAND: python -m unittest scoring_matrices.tests -vv
CIBW_TEST_REQUIRES: importlib-resources
with:
output-dir: dist
- uses: actions/upload-artifact@v4
with:
name: wheels-${{ matrix.python-tag }}
path: dist/*
wheel-win32-x86_64:
name: Build Windows wheels (x86-64)
runs-on: windows-2019
strategy:
matrix:
python-tag:
- cp37-win_amd64
- cp38-win_amd64
- cp39-win_amd64
- cp310-win_amd64
- cp311-win_amd64
- cp312-win_amd64
- pp37-win_amd64
- pp38-win_amd64
- pp39-win_amd64
- pp310-win_amd64
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Build manylinux wheels
uses: pypa/cibuildwheel@v2.16.5
env:
CIBW_ARCHS: AMD64
CIBW_BUILD: ${{ matrix.python-tag }}
CIBW_BEFORE_BUILD: pip install cython
CIBW_BUILD_VERBOSITY: 2
CIBW_TEST_COMMAND: python -m unittest scoring_matrices.tests -vv
CIBW_TEST_REQUIRES: importlib-resources
with:
output-dir: dist
- uses: actions/upload-artifact@v4
with:
name: wheels-${{ matrix.python-tag }}
path: dist/*
sdist:
runs-on: ubuntu-latest
name: Build source distribution
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Set up Python 3.11
uses: actions/setup-python@v2
with:
python-version: 3.11
- name: Install build requirements
run: python -m pip install -r .github/workflows/requirements.txt
- name: Build source distribution
run: python -m build -s
- name: Store built source distribution
uses: actions/upload-artifact@v4
with:
name: sdist
path: dist/*
test-sdist:
runs-on: ubuntu-latest
name: Test source distribution
needs:
- sdist
steps:
- name: Setup Python 3.11
uses: actions/setup-python@v2
with:
python-version: '3.11'
- name: Download source distribution
uses: actions/download-artifact@v4
with:
name: sdist
path: dist/
- name: Update pip to latest version
run: python -m pip install -U pip setuptools wheel
- name: Install built wheel
run: python -m pip install --no-binary scoring-matrices --find-links=dist scoring-matrices
- name: Run tests without coverage
run: python -m unittest scoring_matrices.tests -vv
upload:
environment: PyPI
runs-on: ubuntu-latest
permissions:
id-token: write
name: Upload
needs:
- sdist
- test-sdist
- wheel-linux-aarch64
- wheel-linux-x86_64
- wheel-macos-aarch64
- wheel-macos-x86_64
- wheel-win32-x86_64
steps:
- name: Download source distribution
uses: actions/download-artifact@v4
with:
name: sdist
path: dist/
merge-multiple: true
- name: Download wheel distributions
uses: actions/download-artifact@v4
with:
pattern: wheels-*
path: dist/
merge-multiple: true
- uses: pypa/gh-action-pypi-publish@release/v1
if: startsWith(github.ref, 'refs/tags')
release:
environment: GitHub Releases
runs-on: ubuntu-latest
if: "!contains(github.ref, 'rc')"
name: Release
needs: upload
permissions: write-all
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: rasmus-saks/release-a-changelog-action@v1.2.0
with:
github-token: '${{ secrets.GITHUB_TOKEN }}'
aur:
environment: Arch User Repository
runs-on: ubuntu-latest
if: "!contains(github.ref, 'rc')"
name: Update AUR package
needs:
- sdist
- upload
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Set up Python 3.11
uses: actions/setup-python@v2
with:
python-version: '3.11'
- name: Download built wheels
uses: actions/download-artifact@v2
with:
name: sdist
path: dist
- name: Extract package version
run: echo PKGVER=${{ github.ref }} >> $GITHUB_ENV
- name: Compute SHA256 of source distribution
run: echo SHA256=$(sha256sum dist/scoring-matrices-${{ env.PKGVER }}.tar.gz | cut -f1 -d' ') >> $GITHUB_ENV
- name: Generate PKGBUILD
run: sed -e "s/%pkgver/${{ env.PKGVER }}/g" -e "s/%sha256sum/${{ env.SHA256 }}/g" pkg/aur/PKGBUILD.in > pkg/aur/PKGBUILD
- name: Update package
uses: KSXGitHub/github-actions-deploy-aur@v2.7.2
with:
pkgname: python-scoring-matrices
pkgbuild: pkg/aur/PKGBUILD
commit_username: ${{ secrets.AUR_USER }}
commit_email: ${{ secrets.AUR_EMAIL }}
ssh_private_key: ${{ secrets.AUR_SSH_PRIVATE_KEY }}
commit_message: ${{ github.event.head_commit.message }}
coverage ~=7.0
cython ~=3.0
importlib-resources ; python_version < '3.7'
scikit-build-core
build
name: Test
on:
- push
- pull_request
jobs:
test_linux:
name: Test (Linux)
runs-on: ubuntu-22.04
env:
OS: Linux
strategy:
fail-fast: false
matrix:
include:
- python-version: 3.7
python-release: v3.7
python-impl: CPython
- python-version: 3.8
python-release: v3.8
python-impl: CPython
- python-version: 3.9
python-release: v3.9
python-impl: CPython
- python-version: '3.10'
python-release: v3.10
python-impl: CPython
- python-version: 3.11
python-release: v3.11
python-impl: CPython
- python-version: 3.12
python-release: v3.12
python-impl: CPython
- python-version: pypy-3.7
python-release: v3.7
python-impl: PyPy
- python-version: pypy-3.8
python-release: v3.8
python-impl: PyPy
- python-version: pypy-3.9
python-release: v3.9
python-impl: PyPy
- python-version: pypy-3.10
python-release: v3.10
python-impl: PyPy
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
submodules: true
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Update pip
run: python -m pip install -U pip wheel setuptools
- name: Install Python requirements
run: python -m pip install -r .github/workflows/requirements.txt
- name: Build C extension in debug mode
run: python -m pip install -v --no-build-isolation -e .
- name: Test with coverage
run: python -m coverage run -m unittest scoring_matrices.tests -vv
- name: Upload to Codecov
if: matrix.python-impl == 'CPython'
uses: codecov/codecov-action@v4
with:
flags: ${{ matrix.python-impl }},${{ matrix.python-release }},${{ env.OS }}
name: test-python-${{ matrix.python-version }}
fail_ci_if_error: false
token: ${{ secrets.CODECOV_TOKEN }}
test_osx:
name: Test (OSX)
runs-on: macOS-12
env:
OS: OSX
strategy:
fail-fast: false
matrix:
include:
- python-version: 3.7
python-release: v3.7
python-impl: CPython
- python-version: 3.8
python-release: v3.8
python-impl: CPython
- python-version: 3.9
python-release: v3.9
python-impl: CPython
- python-version: '3.10'
python-release: v3.10
python-impl: CPython
- python-version: '3.11'
python-release: v3.11
python-impl: CPython
- python-version: '3.12'
python-release: v3.12
python-impl: CPython
- python-version: pypy-3.7
python-release: v3.7
python-impl: PyPy
- python-version: pypy-3.8
python-release: v3.8
python-impl: PyPy
- python-version: pypy-3.9
python-release: v3.9
python-impl: PyPy
- python-version: pypy-3.10
python-release: v3.10
python-impl: PyPy
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
submodules: true
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Update pip
run: python -m pip install -U pip wheel setuptools
- name: Install Python requirements
run: python -m pip install -r .github/workflows/requirements.txt
- name: Build C extension in debug mode
run: python -m pip install -v -e . --no-build-isolation
- name: Test with coverage
run: python -m coverage run -m unittest scoring_matrices.tests -vv
- name: Upload to Codecov
if: matrix.python-impl == 'CPython'
uses: codecov/codecov-action@v4
with:
flags: ${{ matrix.python-impl }},${{ matrix.python-release }},${{ env.OS }}
name: test-python-${{ matrix.python-version }}
fail_ci_if_error: false
token: ${{ secrets.CODECOV_TOKEN }}
test_windows:
name: Test (Windows)
runs-on: windows-latest
env:
OS: Windows
strategy:
fail-fast: false
matrix:
include:
- python-version: 3.7
python-release: v3.7
python-impl: CPython
- python-version: 3.8
python-release: v3.8
python-impl: CPython
- python-version: 3.9
python-release: v3.9
python-impl: CPython
- python-version: '3.10'
python-release: v3.10
python-impl: CPython
- python-version: '3.11'
python-release: v3.11
python-impl: CPython
- python-version: '3.12'
python-release: v3.12
python-impl: CPython
- python-version: pypy-3.7
python-release: v3.7
python-impl: PyPy
- python-version: pypy-3.8
python-release: v3.8
python-impl: PyPy
- python-version: pypy-3.9
python-release: v3.9
python-impl: PyPy
- python-version: pypy-3.10
python-release: v3.10
python-impl: PyPy
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
submodules: true
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
architecture: x64
- name: Update pip
run: python -m pip install -U pip wheel setuptools
- name: Install Python requirements
run: python -m pip install -r .github/workflows/requirements.txt
- name: Build C extension
run: python -m pip install -v . --no-build-isolation
- name: Test without coverage
run: python -m unittest scoring_matrices.tests -vv
# Created by https://www.gitignore.io/api/python,c
# Edit at https://www.gitignore.io/?templates=python,c
### C ###
# Prerequisites
*.d
# Object files
*.o
*.ko
*.obj
*.elf
# Linker output
*.ilk
*.map
*.exp
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
# Debug files
*.dSYM/
*.su
*.idb
*.pdb
# Kernel Module Compile Results
*.mod*
*.cmd
.tmp_versions/
modules.order
Module.symvers
Mkfile.old
dkms.conf
# Valgrind files
callgrind.out.*
# Memprofile
mprofile_*.dat
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# End of https://www.gitignore.io/api/python,c
# Read the Docs configuration file for Sphinx projects
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Set the OS, Python version and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.12"
# VCS submodules configuration.
submodules:
include: all
# Build documentation in the "docs/" directory with Sphinx
sphinx:
configuration: docs/conf.py
# Optional but recommended, declare the Python requirements required
# to build your documentation
# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
python:
install:
- requirements: docs/requirements.txt
- method: pip
path: .
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [Unreleased]
[Unreleased]: https://github.com/althonos/scoring-matrices/compare/v0.3.0...HEAD
## [v0.3.0] - 2024-10-20
[v0.3.0]: https://github.com/althonos/scoring-matrices/compare/v0.2.2...v0.3.0
### Changed
- Rewrite package build using `scikit-build-core`.
- Use the PyData theme in documentation.
### Removed
- Support for Python 3.6.
## [v0.2.2] - 2024-06-24
[v0.2.2]: https://github.com/althonos/scoring-matrices/compare/v0.2.1...v0.2.2
### Fixed
- Segmentation fault due to out-of-bounds access in `ScoringMatrix.is_symmetric` ([#2](https://github.com/althonos/scoring-matrices/issues/2)).
## [v0.2.1] - 2024-06-06
[v0.2.1]: https://github.com/althonos/scoring-matrices/compare/v0.2.0...v0.2.1
### Fixed
- Missing type hints of `name` and `alphabet` attributes of `ScoringMatrix` ([#1](https://github.com/althonos/scoring-matrices/pull/1), by [@RayHackett](https://github.com/RayHackett)).
## [v0.2.0] - 2024-05-06
[v0.2.0]: https://github.com/althonos/scoring-matrices/compare/v0.1.1...v0.2.0
### Added
- `ScoringMatrix.is_symmetric` method to check whether the matrix is symmetric.
- `ScoringMatrix.from_match_mismatch` and `ScoringMatrix.from_diagonal` constructors.
### Fixed
- Rounding of constants in generated `matrices.h` header.
## [v0.1.1] - 2024-05-03
[v0.1.1]: https://github.com/althonos/scoring-matrices/compare/v0.1.0...v0.1.1
### Fixed
- Compilation of Python Limited API wheels.
## [v0.1.0] - 2024-05-03
[v0.1.0]: https://github.com/althonos/scoring-matrices/compare/de079cc0...v0.1.0
Initial release.
cmake_minimum_required(VERSION 3.20)
project(${SKBUILD_PROJECT_NAME} VERSION ${SKBUILD_PROJECT_VERSION} LANGUAGES C)
set(BUILD_SHARED_LIBS OFF)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
# --- Prepare Cython extensions ------------------------------------------------
set_property(GLOBAL PROPERTY PYTHON_EXTENSIONS_SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/src)
include("src/scripts/cmake/CythonExtension.cmake")
# --- Setup include folders ----------------------------------------------------
# set(CYTHON_HEADERS_DIR ${CMAKE_CURRENT_LIST_DIR}/src/scoring_matrices)
# --- Compile source code ------------------------------------------------------
add_subdirectory(data)
add_subdirectory(src)
# Contributing to `scoring-matrices`
For bug fixes or new features, please file an issue before submitting a
pull request. If the change isn't trivial, it may be best to wait for
feedback.
## Setting up a local repository
You can just clone the GitHub repository to a local folder and start working
from there:
```console
$ git clone https://github.com/althonos/scoring-matrices
```
## Running tests
Tests are written as usual Python unit tests with the `unittest` module of
the standard library. Running them requires the extension to be built
locally:
```console
$ python -m pip install -v -e . --no-build-isolation
$ python -m unittest scoring_matrices.tests -vv
```
## Coding guidelines
This project targets Python 3.7 or later.
Python objects should be typed; since it is not supported by Cython,
you must manually declare types in type stubs (`.pyi` files). In Python
files, you can add type annotations to function signatures (supported in
Python 3.5) and in variable assignments (supported only from Python
3.6 onward).
### Interfacing with C/C++
When interfacing with C or C++, and in particular with pointers, use
assertions everywhere you assume the pointer to be non-NULL. Also consider
using assertions when accessing raw C arrays, if applicable.

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

file(GLOB MATRICES CONFIGURE_DEPENDS "*.mat")
set(GLOBAL PROPERTY MATRICES ${MATRICES})
add_custom_command(OUTPUT matrices.h
COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/src/scripts/generate_matrices.py --output matrices.h ${MATRICES}
DEPENDS ${MATRICES}
)
file(CONFIGURE OUTPUT stub.c CONTENT "")
add_library(matrices matrices.h ${CMAKE_CURRENT_BINARY_DIR}/stub.c)
set_target_properties(matrices PROPERTIES LINKER_LANGUAGE C)
target_include_directories(matrices INTERFACE ${CMAKE_CURRENT_BINARY_DIR})
# install(FILES ${CMAKE_CURRENT_BINARY_DIR}/matrices.h DESTINATION ${SKBUILD_PLATLIB_DIR}/scoring_matrices)

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

/*******************************************************************************
* Set a custom icon for pypi as it's not available in the fa built-in brands
*/
FontAwesome.library.add(
(faListOldStyle = {
prefix: "fa-custom",
iconName: "pypi",
icon: [
17.313, // viewBox width
19.807, // viewBox height
[], // ligature
"e001", // unicode codepoint - private use area
"m10.383 0.2-3.239 1.1769 3.1883 1.1614 3.239-1.1798zm-3.4152 1.2411-3.2362 1.1769 3.1855 1.1614 3.2369-1.1769zm6.7177 0.00281-3.2947 1.2009v3.8254l3.2947-1.1988zm-3.4145 1.2439-3.2926 1.1981v3.8254l0.17548-0.064132 3.1171-1.1347zm-6.6564 0.018325v3.8247l3.244 1.1805v-3.8254zm10.191 0.20931v2.3137l3.1777-1.1558zm3.2947 1.2425-3.2947 1.1988v3.8254l3.2947-1.1988zm-8.7058 0.45739c0.00929-1.931e-4 0.018327-2.977e-4 0.027485 0 0.25633 0.00851 0.4263 0.20713 0.42638 0.49826 1.953e-4 0.38532-0.29327 0.80469-0.65542 0.93662-0.36226 0.13215-0.65608-0.073306-0.65613-0.4588-6.28e-5 -0.38556 0.2938-0.80504 0.65613-0.93662 0.068422-0.024919 0.13655-0.038114 0.20156-0.039466zm5.2913 0.78369-3.2947 1.1988v3.8247l3.2947-1.1981zm-10.132 1.239-3.2362 1.1769 3.1883 1.1614 3.2362-1.1769zm6.7177 0.00213-3.2926 1.2016v3.8247l3.2926-1.2009zm-3.4124 1.2439-3.2947 1.1988v3.8254l3.2947-1.1988zm-6.6585 0.016195v3.8275l3.244 1.1805v-3.8254zm16.9 0.21143-3.2947 1.1988v3.8247l3.2947-1.1981zm-3.4145 1.2411-3.2926 1.2016v3.8247l3.2926-1.2009zm-3.4145 1.2411-3.2926 1.2016v3.8247l3.2926-1.2009zm-3.4124 1.2432-3.2947 1.1988v3.8254l3.2947-1.1988zm-6.6585 0.019027v3.8247l3.244 1.1805v-3.8254zm13.485 1.4497-3.2947 1.1988v3.8247l3.2947-1.1981zm-3.4145 1.2411-3.2926 1.2016v3.8247l3.2926-1.2009zm2.4018 0.38127c0.0093-1.83e-4 0.01833-3.16e-4 0.02749 0 0.25633 0.0085 0.4263 0.20713 0.42638 0.49826 1.97e-4 0.38532-0.29327 0.80469-0.65542 0.93662-0.36188 0.1316-0.65525-0.07375-0.65542-0.4588-1.95e-4 -0.38532 0.29328-0.80469 0.65542-0.93662 0.06842-0.02494 0.13655-0.03819 0.20156-0.03947zm-5.8142 0.86403-3.244 1.1805v1.4201l3.244 1.1805z", // svg path (https://simpleicons.org/icons/pypi.svg)
],
}),
);
FontAwesome.library.add(
(faListOldStyle = {
prefix: "fa-custom",
iconName: "sword",
icon: [
256, // viewBox width
256, // viewBox height
[], // ligature
"e002", // unicode codepoint - private use area
"M221.65723,34.34326A8.00246,8.00246,0,0,0,216,32h-.02539l-63.79883.20117A8.00073,8.00073,0,0,0,146.0332,35.106L75.637,120.32275,67.31348,111.999A16.02162,16.02162,0,0,0,44.68555,112L32.001,124.68555A15.99888,15.99888,0,0,0,32,147.31348l20.88672,20.88769L22.94531,198.14258a16.01777,16.01777,0,0,0,.001,22.62695l12.28418,12.28418a16.00007,16.00007,0,0,0,22.62793,0L87.79883,203.1123,108.68652,224.001A16.02251,16.02251,0,0,0,131.31445,224L143.999,211.31445A15.99888,15.99888,0,0,0,144,188.68652l-8.32324-8.32324,85.21679-70.39648a8.00125,8.00125,0,0,0,2.90528-6.14258L224,40.02539A8.001,8.001,0,0,0,221.65723,34.34326Zm-13.84668,65.67822-83.49829,68.97706L111.314,156l54.34327-54.34277a8.00053,8.00053,0,0,0-11.31446-11.31446L100,144.686,87.00195,131.6875,155.97852,48.189l51.99609-.16357Z", // svg path (https://simpleicons.org/icons/pypi.svg)
],
}),
);
FontAwesome.library.add(
(faListOldStyle = {
prefix: "fa-custom",
iconName: "knife",
icon: [
256, // viewBox width
256, // viewBox height
[], // ligature
"e003", // unicode codepoint - private use area
"M231.79883,32.2002a28.05536,28.05536,0,0,0-39.667.06933L18.27441,210.41211a8,8,0,0,0,3.92676,13.38281,155.06019,155.06019,0,0,0,34.957,4.00293c33.4209-.001,66.877-10.86914,98.32813-32.1748,31.74512-21.50391,50.14551-45.79981,50.91406-46.82325a8.00114,8.00114,0,0,0-.74316-10.457L186.919,119.60547l44.97753-47.90332A28.03445,28.03445,0,0,0,231.79883,32.2002ZM189.207,144.52148a225.51045,225.51045,0,0,1-43.10351,38.13184c-34.46973,23.23145-69.999,32.665-105.83887,28.13477l106.29492-108.915,23.30176,23.30175q.208.22852.43847.44434l.082.07617Z", // svg path (https://simpleicons.org/icons/pypi.svg)
],
}),
);
[
{
"name": "v0.3 (latest)",
"version": "0.3.0",
"url": "https://pytantan.readthedocs.io/en/v0.3.0/"
},
{
"name": "v0.2",
"version": "0.2.2",
"url": "https://pytantan.readthedocs.io/en/v0.2.2/"
},
{
"name": "v0.1",
"version": "0.1.1",
"url": "https://pytantan.readthedocs.io/en/v0.1.1/"
}
]
API Reference
=============
.. currentmodule:: scoring_matrices
.. automodule:: scoring_matrices
Classes
-------
.. toctree::
:hidden:
ScoringMatrix <scoring_matrix>
.. only:: html
.. autosummary::
:nosignatures:
scoring_matrices.ScoringMatrix
``ScoringMatrix``
=================
.. currentmodule:: scoring_matrices
.. autoclass:: ScoringMatrix
:members:
.. c:function:: const float* data_ptr()
Get a pointer to the scoring matrix as a C-contiguous array.
.. c:function:: const float** matrix_ptr()
Get a pointer to the scoring matrix as an array of pointers to matrix rows.
.. c:function:: const char* alphabet_ptr()
Get a pointer to the scoring matrix alphabet as a C-string.
.. c:function:: size_t size()
Get the size of the scoring matrix.
.. automethod:: __init__
.. automethod:: __len__
.. automethod:: __getitem__
.. automethod:: __copy__
.. automethod:: __eq__
.. automethod:: __reduce_ex__
# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Imports -----------------------------------------------------------------
import datetime
import os
import re
import semantic_version
import shutil
import sys
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
docssrc_dir = os.path.dirname(os.path.abspath(__file__))
project_dir = os.path.dirname(docssrc_dir)
# When building on ReadTheDocs, we can't provide a local version of the Cython
# extensions, so we have to install the latest public version, and avoid
# patching the PYTHONPATH with the local development folder
if os.getenv("READTHEDOCS", "False") != "True":
sys.path.insert(0, project_dir)
# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
import scoring_matrices
# extract the project metadata from the module itself
project = scoring_matrices.__name__
author = re.match("(.*) <.*>", scoring_matrices.__author__).group(1)
year = datetime.date.today().year
copyright = "{}, {}".format("2024" if year == 2024 else "2024-{}".format(year), author)
# extract the semantic version
semver = semantic_version.Version.coerce(scoring_matrices.__version__)
version = str(semver.truncate(level="patch"))
release = str(semver)
# patch the docstring so that we don't show the link to redirect
# to the docs (we don't want to see it when reading the docs already, duh!)
doc_lines = scoring_matrices.__doc__.splitlines()
if "See Also:" in doc_lines:
see_also = doc_lines.index("See Also:")
scoring_matrices.__doc__ = "\n".join(doc_lines[:see_also])
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
"sphinx.ext.intersphinx",
"sphinx.ext.napoleon",
"sphinx.ext.coverage",
"sphinx.ext.mathjax",
"sphinx.ext.todo",
"sphinx.ext.extlinks",
"sphinx_design",
"sphinxcontrib.jquery",
"recommonmark",
"nbsphinx",
"IPython.sphinxext.ipython_console_highlighting",
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = [
"_build",
"Thumbs.db",
".DS_Store",
"**.ipynb_checkpoints",
"requirements.txt",
]
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "monokailight"
# The name of the default role for inline references
default_role = "py:obj"
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = "pydata_sphinx_theme"
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static/js', '_static/bibtex', '_static/css', '_static/json']
html_js_files = ["custom-icon.js"]
html_css_files = ["custom.css"]
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
html_theme_options = {
"external_links": [],
"show_toc_level": 2,
"use_edit_page_button": True,
"icon_links": [
{
"name": "GitHub",
"url": "https://github.com/althonos/scoring-matrices",
"icon": "fa-brands fa-github",
},
{
"name": "PyPI",
"url": "https://pypi.org/project/scoring-matrices",
"icon": "fa-custom fa-pypi",
},
],
"logo": {
"text": "PyJess",
# "image_light": "_images/logo.png",
# "image_dark": "_images/logo.png",
},
"navbar_start": ["navbar-logo", "version-switcher"],
"navbar_align": "left",
"footer_start": ["copyright"],
"footer_center": ["sphinx-version"],
"switcher": {
"json_url": "https://scoring-matrices.readthedocs.io/en/latest/_static/switcher.json",
"version_match": version,
}
}
html_context = {
"github_user": "althonos",
"github_repo": "scoring-matrices",
"github_version": "main",
"doc_path": "docs",
}
html_favicon = '_images/favicon.ico'
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = project
# -- Extension configuration -------------------------------------------------
# -- Options for imgmath extension -------------------------------------------
imgmath_image_format = "svg"
# -- Options for napoleon extension ------------------------------------------
napoleon_include_init_with_doc = True
napoleon_include_special_with_doc = True
napoleon_include_private_with_doc = True
napoleon_use_admonition_for_examples = True
napoleon_use_admonition_for_notes = True
napoleon_use_admonition_for_references = True
napoleon_use_rtype = False
# -- Options for autodoc extension -------------------------------------------
autoclass_content = "class"
autodoc_member_order = "bysource"
autosummary_generate = []
# -- Options for intersphinx extension ---------------------------------------
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {
"python": ("https://docs.python.org/3/", None),
}
# -- Options for recommonmark extension --------------------------------------
source_suffix = {
".rst": "restructuredtext",
".txt": "markdown",
".md": "markdown",
}
# -- Options for nbsphinx extension ------------------------------------------
nbsphinx_execute = "auto"
nbsphinx_execute_arguments = [
"--InlineBackend.figure_formats={'svg', 'pdf'}",
"--InlineBackend.rc={'figure.dpi': 96}",
]
# -- Options for extlinks extension ------------------------------------------
extlinks = {
"doi": ("https://doi.org/%s", "doi:%s"),
"pmid": ("https://pubmed.ncbi.nlm.nih.gov/%s", "PMID:%s"),
"pmc": ("https://www.ncbi.nlm.nih.gov/pmc/articles/PMC%s", "PMC%s"),
"isbn": ("https://www.worldcat.org/isbn/%s", "ISBN:%s"),
"wiki": ("https://en.wikipedia.org/wiki/%s", "Wikipedia:%s"),
}
Copyright Notice
================
This library is provided under the MIT license:
.. literalinclude:: ../../COPYING
User Guide
==========
This section contains guides and documents about ``scoring-matrices`` usage.
.. toctree::
:maxdepth: 1
:caption: Getting Started
Installation <install>
.. toctree::
:maxdepth: 1
:caption: Resources
Contribution Guide <contributing>
Changelog <changes>
Copyright Notice <copyright>
Installation
============
.. note::
Wheels are provided for x86-64 Linux, MacOS, and Windows, as well
as Aarch64 Linux and MacOS, but other machines will have to build the
wheel from the source distribution. Building ``scoring-matrices`` involves
compiling some Cython code, which requires a C compiler to be available on
the local machine.
PyPi
^^^^
``scoring-matrices`` is hosted on GitHub, but the easiest way to install it is to download
the latest release from its `PyPi repository <https://pypi.python.org/pypi/scoring-matrices>`_.
It will install all build dependencies then install ``scoring-matrices``
either from a wheel if one is available, or from source after compiling the
Cython code :
.. code:: console
$ pip install --user scoring-matrices
Conda
^^^^^
`scoring-matrices` is also available as a `recipe <https://anaconda.org/bioconda/scoring-matrices>`_
in the `bioconda <https://bioconda.github.io/>`_ channel. To install, simply
use the ``conda`` installer:
.. code:: console
$ conda install bioconda::scoring-matrices
Arch User Repository
^^^^^^^^^^^^^^^^^^^^
A package recipe for Arch Linux can be found in the Arch User Repository
under the name `python-scoring-matrices <https://aur.archlinux.org/packages/python-scoring-matrices>`_.
It will always match the latest release from PyPI.
Steps to install on ArchLinux depend on your `AUR helper <https://wiki.archlinux.org/title/AUR_helpers>`_
(``yaourt``, ``aura``, ``yay``, etc.). For ``aura``, you'll need to run:
.. code:: console
$ aura -A python-scoring-matrices
Piwheels
^^^^^^^^
``scoring-matrices`` is compatible with Raspberry Pi computers, and pre-built
wheels are compiled for `armv7l` platforms on `piwheels <https://www.piwheels.org>`_.
Run the following command to install these instead of compiling from source:
.. code:: console
$ pip3 install scoring-matrices --extra-index-url https://www.piwheels.org/simple
Check the `piwheels documentation <https://www.piwheels.org/faq.html>`_ for
more information.
GitHub + ``pip``
^^^^^^^^^^^^^^^^
If, for any reason, you prefer to download the library from GitHub, you can clone
the repository and install the repository by running (with the admin rights):
.. code:: console
$ git clone --recursive https://github.com/althonos/scoring-matrices
$ pip install --user ./scoring-matrices
.. caution::
Keep in mind this will install always try to install the latest commit,
which may not even build, so consider using a versioned release instead.
``scoring-matrices`` |Stars|
============================
.. |Stars| image:: https://img.shields.io/github/stars/althonos/scoring-matrices.svg?style=social&maxAge=3600&label=Star
:target: https://github.com/althonos/scoring-matrices/stargazers
*Dependency free, Cython-compatible scoring matrices to use with biological sequences.*
|Actions| |Coverage| |PyPI| |Bioconda| |AUR| |Wheel| |Versions| |Implementations| |License| |Source| |Issues| |Docs| |Changelog| |Downloads|
.. |Actions| image:: https://img.shields.io/github/actions/workflow/status/althonos/scoring-matrices/test.yml?branch=main&logo=github&style=flat-square&maxAge=300
:target: https://github.com/althonos/scoring-matrices/actions
.. |Coverage| image:: https://img.shields.io/codecov/c/gh/althonos/scoring-matrices?style=flat-square&maxAge=600
:target: https://codecov.io/gh/althonos/scoring-matrices/
.. |PyPI| image:: https://img.shields.io/pypi/v/scoring-matrices.svg?style=flat-square&maxAge=3600
:target: https://pypi.python.org/pypi/scoring-matrices
.. |Bioconda| image:: https://img.shields.io/conda/vn/bioconda/scoring-matrices?style=flat-square&maxAge=3600
:target: https://anaconda.org/bioconda/scoring-matrices
.. |AUR| image:: https://img.shields.io/aur/version/python-scoring-matrices?logo=archlinux&style=flat-square&maxAge=3600
:target: https://aur.archlinux.org/packages/python-scoring-matrices
.. |Wheel| image:: https://img.shields.io/pypi/wheel/scoring-matrices?style=flat-square&maxAge=3600
:target: https://pypi.org/project/scoring-matrices/#files
.. |Versions| image:: https://img.shields.io/pypi/pyversions/scoring-matrices.svg?style=flat-square&maxAge=3600
:target: https://pypi.org/project/scoring-matrices/#files
.. |Implementations| image:: https://img.shields.io/pypi/implementation/scoring-matrices.svg?style=flat-square&maxAge=3600&label=impl
:target: https://pypi.org/project/scoring-matrices/#files
.. |License| image:: https://img.shields.io/badge/license-MIT-blue.svg?style=flat-square&maxAge=3600
:target: https://choosealicense.com/licenses/mit/
.. |Source| image:: https://img.shields.io/badge/source-GitHub-303030.svg?maxAge=2678400&style=flat-square
:target: https://github.com/althonos/scoring-matrices/
.. |Issues| image:: https://img.shields.io/github/issues/althonos/scoring-matrices.svg?style=flat-square&maxAge=600
:target: https://github.com/althonos/scoring-matrices/issues
.. |Docs| image:: https://img.shields.io/readthedocs/scoring-matrices?style=flat-square&maxAge=3600
:target: http://scoring-matrices.readthedocs.io/en/stable/?badge=stable
.. |Changelog| image:: https://img.shields.io/badge/keep%20a-changelog-8A0707.svg?maxAge=2678400&style=flat-square
:target: https://github.com/althonos/scoring-matrices/blob/main/CHANGELOG.md
.. |Downloads| image:: https://img.shields.io/pypi/dm/scoring-matrices?style=flat-square&color=303f9f&maxAge=86400&label=downloads
:target: https://pepy.tech/project/scoring-matrices
.. currentmodule:: scoring_matrices
Overview
--------
*Scoring Matrices* are matrices used to score the matches and mismatches between
two characters are the same position in a sequence alignment. Some of these
matrices are derived from *substitution matrices*, which uses evolutionary
modeling.
The ``scoring-matrices`` package is a dependency-free, batteries included
Cython library to handle and distribute common substitution matrices:
.. grid:: 1 2 3 3
:gutter: 1
.. grid-item-card:: :fas:`box` Self-contained
The matrices are distributed as-is: you don't need the whole
`Biopython <https://biopython.org>`_ ecosystem, or even
`NumPy <https://numpy.org/>`_.
.. grid-item-card:: :fas:`gears` Cython Support
The `ScoringMatrix` class is a Cython class that can be
inherited, and the matrix data can be accessed as either a raw pointer, or
a `typed memoryview <https://cython.readthedocs.io/en/latest/src/userguide/memoryviews.html>`_.
.. grid-item-card:: :fas:`battery-full` Batteries-included
The package distributes most common matrices, such as those used by
the NCBI BLAST+ suite, including: *PAM*, *BLOSUM*, *VTML*, *BENNER*,
etc.
.. grid-item-card:: :fas:`file-import` I/O Support
Easily load a `ScoringMatrix` from a file, a file-like object, or by
name for common matrices.
.. grid-item-card:: :fas:`screwdriver-wrench` Configurable
Easily build your own scoring matrices using the Python interface,
including various shortcut constructors to create a `ScoringMatrix`
from a diagonal or a pair of match/mismatch scores.
.. grid-item-card:: :fas:`stamp` Versioned
This library follows semantic versioning to guarantee compatibility
between patch versions, allowing for a safe API and ABI that can be
reused without recompiling on each install.
Setup
-----
``scoring-matrices`` is available for all modern Python versions (3.7+).
Run ``pip install scoring-matrices`` in a shell to download the latest release
from PyPi, or have a look at the :doc:`Installation page <guide/install>` to find
other ways to install the package.
Library
-------
.. toctree::
:maxdepth: 2
User Guide <guide/index>
API Reference <api/index>
Related Projects
----------------
The following Python libraries may be of interest for bioinformaticians.
.. grid:: 1 3 5 5
:gutter: 1
.. grid-item-card:: :fas:`diamond` PyHMMER
:link: https://pyhmmer.readthedocs.io
Profile Hidden Markov Models (with HMMER).
.. grid-item-card:: :fas:`fire` Pyrodigal
:link: https://pyrodigal.readthedocs.io
Prokaryotic Gene Finding (with Prodigal).
.. grid-item-card:: :fas:`virus-covid` Pyrodigal-gv
:link: https://github.com/althonos/pyrodigal-gv
Pyrodigal for Giant Viruses.
.. grid-item-card:: :fas:`align-center` PyFAMSA
:link: https://pyfamsa.readthedocs.io
Multiple Sequence Alignment (with FAMSA).
.. grid-item-card:: :fas:`scissors` PytrimAl
:link: https://pytrimal.readthedocs.io
Alignment Trimming (with trimAl).
.. grid-item-card:: :fas:`music` LightMotif
:link: https://lightmotif.readthedocs.io
Platform-accelerated motif scoring.
.. grid-item-card:: :fas:`knife;fa-custom` Diced
:link: https://diced.readthedocs.io
CRISPR Detection (with MinCED).
.. grid-item-card:: :fas:`table-cells` Scoring Matrices
:link: https://scoring-matrices.readthedocs.io
Scoring matrices for Cython.
.. grid-item-card:: :fas:`chain` Pyskani
:link: https://pyskani.readthedocs.io
Average Nucleotide Identity (with skani).
.. grid-item-card:: :fas:`forward-fast` PyFastANI
:link: https://pyfastani.readthedocs.io
Average Nucleotide Identity (with FastANI).
.. grid-item-card:: :fas:`magnifying-glass` PyJess
:link: https://pyjess.readthedocs.io
Geometric Template Matching (with Jess).
.. grid-item-card:: :fas:`repeat` PyTantan
:link: https://pytantan.readthedocs.io
Tandem Repeat Masking (with Tantan).
.. grid-item-card:: :fas:`gem` PyOpal
:link: https://pyopal.readthedocs.io
Query/Database Aligner (with Opal).
.. grid-item-card:: :fas:`sword;fa-custom` PySWRD
:link: https://pyswrd.readthedocs.io
Database Heuristic Filtering (with SWORD).
.. grid-item-card:: :fas:`rocket` Mini3di
:link: https://github.com/althonos/mini3di
Protein structure to 3di in pure Python.
.. grid-item-card:: :fas:`calculator` ``peptides.py``
:link: https://peptides.readthedocs.io
Peptide descriptors for Python.
.. grid-item-card:: :fas:`diagram-project` Pronto
:link: https://pronto.readthedocs.io
Open Biomedical Ontologies for Python.
.. grid-item-card:: :fas:`box` NAFcodec
:link: https://nafcodec.readthedocs.io
Nucleotide Archival Format for Python.
.. grid-item-card:: :fas:`bank` ``gb-io.py``
:link: https://gb-io.readthedocs.io
Fast GenBank parser for Python (with ``gb-io``).
License
-------
This library is provided under the `MIT License <https://choosealicense.com/licenses/mit/>`_.
Matrices were collected from the `MMseqs2 <https://github.com/soedinglab/MMseqs2>`_,
`Biopython <https://github.com/biopython/biopython/tree/master/Bio/Align/substitution_matrices/data>`_
and `NCBI BLAST+ <https://ftp.ncbi.nih.gov/blast/matrices/>`_ sources and are believed to
be in the public domain.
*This project was developed by* `Martin Larralde <https://github.com/althonos/>`_
*during his PhD project at the* `Leiden University Medical Center <https://www.lumc.nl/en/>`_
*in the* `Zeller team <https://github.com/zellerlab>`_.
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
if "%1" == "" goto help
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
# build dependencies
setuptools >=46.4
cython ~=3.0
# sphinx documentation dependencies
semantic_version ~=2.8
sphinx >=5.0
recommonmark ~=0.7
pygments-style-monokailight ~=0.4
ipython ~=7.19
pygments ~=2.4
nbsphinx ~=0.8
sphinxcontrib-jquery ~=4.1
sphinx-design
pydata-sphinx-theme
# Maintainer: Martin Larralde <martin.larralde@embl.de>
_name=scoring-matrices
_module=${_name/-/_}
pkgname=python-${_name}
pkgver=%pkgver
pkgrel=1
pkgdesc="Dependency free, Cython-compatible scoring matrices to use with biological sequences."
arch=('i686' 'pentium4' 'x86_64' 'arm' 'armv6h' 'armv7h' 'aarch64')
url="https://github.com/althonos/scoring-matrices"
license=("MIT")
depends=('python')
makedepends=('cython' 'python-build' 'python-installer' 'cmake' 'ninja' 'python-scikit-build-core')
source=("https://files.pythonhosted.org/packages/source/${_name::1}/$_name/$_name-$pkgver.tar.gz")
sha256sums=(%sha256sum)
build() {
cd "${srcdir}/${_name}-${pkgver}"
python -m build --wheel --no-isolation --skip-dependency-check
}
check() {
local abitag=$(python -c 'import sys; print(*sys.version_info[:2], sep="")')
local machine=$(python -c 'import platform; print(platform.machine())')
whl="${srcdir}/${_name}-${pkgver}/dist/${_module}-${pkgver}-cp${abitag}-cp${abitag}-linux_${machine}.whl"
python -m venv --symlinks --system-site-packages "${srcdir}/env"
source "${srcdir}/env/bin/activate"
python -m installer "$whl"
python -m unittest ${_module}.tests
}
package() {
local abitag=$(python -c 'import sys; print(*sys.version_info[:2], sep="")')
local machine=$(python -c 'import platform; print(platform.machine())')
whl="${srcdir}/${_name}-${pkgver}/dist/${_module}-${pkgver}-cp${abitag}-cp${abitag}-linux_${machine}.whl"
python -m installer --prefix="${pkgdir}/usr" "$whl"
install -Dm644 ${srcdir}/${_name}-${pkgver}/COPYING "$pkgdir/usr/share/licenses/$pkgname/COPYING"
}
add_subdirectory(scoring_matrices)
from .lib cimport ScoringMatrix
from ._version import __version__
__author__ = "Martin Larralde <martin.larralde@embl.de>"
__license__ = "MIT"
from . import lib
from .lib import ScoringMatrix
__doc__ = lib.__doc__
__version__ = "0.3.0rc1"
lib.c
lib.html
matrices.h
cython_extension(lib LINKS matrices)
# distutils: language = c
# cython: language_level=3, linetrace=True, binding=True
cdef class ScoringMatrix:
cdef readonly str name
cdef readonly str alphabet
cdef size_t _size
cdef size_t _nitems
cdef Py_ssize_t[2] _shape
cdef float* _data
cdef float** _matrix
cdef char* _alphabet
cdef int _allocate(self, size_t length) except 1 nogil
cdef const float* data_ptr(self) except NULL nogil
cdef const float** matrix_ptr(self) except NULL nogil
cdef const char* alphabet_ptr(self) except NULL nogil
cdef size_t size(self) noexcept nogil
cpdef bint is_integer(self)
cpdef bint is_symmetric(self)
cpdef float min(self)
cpdef float max(self)
cpdef ScoringMatrix copy(self)
cpdef ScoringMatrix shuffle(self, str alphabet)
import typing
from typing import ClassVar, List, Optional, Sequence, TextIO, Tuple, Type, TypeVar
S = TypeVar("S")
class ScoringMatrix:
DEFAULT_ALPHABET: ClassVar[str]
@classmethod
def from_name(cls: Type[S], name: str = "BLOSUM62") -> S: ...
@classmethod
def from_file(cls: Type[S], file: TextIO, name: Optional[str] = None) -> S: ...
@classmethod
def from_str(cls: Type[S], text: str, name: Optional[str] = None) -> S: ...
@classmethod
def from_diagonal(
cls: Type[S],
diagonal: Iterable[float],
mismatch_score: float = 0.0,
alphabet: str = DEFAULT_ALPHABET,
name: Optional[str] = None,
) -> S: ...
@classmethod
def from_match_mismatch(
cls: Type[S],
match_score: float = 1.0,
mismatch_score: float = -0.0,
alphabet: str = DEFAULT_ALPHABET,
name: Optional[str] = None,
) -> S: ...
def __init__(
self,
matrix: Sequence[Sequence[float]],
alphabet: str = DEFAULT_ALPHABET,
name: Optional[str] = None,
): ...
def __copy__(self: S) -> S: ...
def __repr__(self) -> str: ...
def __reduce_ex__(self, protocol: object) -> Tuple[object, ...]: ...
def __len__(self) -> int: ...
def __eq__(self, other: object) -> bool: ...
@property
def alphabet(self) -> str: ...
@property
def name(self) -> Optional[str]: ...
@typing.overload
def __getitem__(self, item: int) -> List[float]: ...
@typing.overload
def __getitem__(self, item: str) -> List[float]: ...
@typing.overload
def __getitem__(self, item: Tuple[int, int]) -> float: ...
@typing.overload
def __getitem__(self, item: Tuple[str, str]) -> float: ...
def copy(self: S) -> S: ...
def is_integer(self) -> bool: ...
def is_symmetric(self) -> bool: ...
def min(self) -> float: ...
def max(self) -> float: ...
def shuffle(self: S, alphabet: str) -> S: ...
# distutils: language = c
# cython: language_level=3, linetrace=True, binding=True
"""Dependency free, Cython-compatible scoring matrices for bioinformatics.
"""
cimport cython
from cpython.buffer cimport PyBUF_FORMAT, PyBUF_READ, PyBUF_WRITE
from cpython.memoryview cimport PyMemoryView_FromMemory
from libc.math cimport INFINITY, lrintf
from libc.stdlib cimport free, realloc
from libc.string cimport memcpy, memset
from .matrices cimport _ALPHABETS, _MATRICES, _NAMES, _SIZES
import io
import pickle
cdef dict _INDICES = {
_NAMES[i].decode('ascii'):i
for i in range(sizeof(_NAMES) /sizeof(const char*) - 1)
}
cdef class ScoringMatrix:
"""A scoring matrix to use for biological sequence alignments.
"""
DEFAULT_ALPHABET = "ARNDCQEGHILKMFPSTWYVBZX*"
BUILTIN_MATRICES = frozenset(_INDICES)
# --- Constructors ---------------------------------------------------------
@classmethod
def from_name(cls, str name not None = "BLOSUM62"):
"""Load a built-in scoring matrix by name.
Arguments:
name (`str`): The name of the scoring matrix.
Example:
>>> blosum62 = ScoringMatrix.from_name("BLOSUM62")
Raises:
`ValueError`: When no scoring matrix with the given ``name``
can be found in the embedded matrix data.
"""
cdef size_t i
cdef size_t j
cdef size_t k
cdef list rows
try:
i = _INDICES[name]
except KeyError as err:
raise ValueError(f"Unknown scoring matrix: {name!r}") from err
else:
alphabet = _ALPHABETS[i].decode('ascii')
rows = []
for j in range(0, _SIZES[i] * _SIZES[i], _SIZES[i]):
row = []
for k in range(_SIZES[i]):
row.append(_MATRICES[i][j + k])
rows.append(row)
return cls(rows, alphabet=alphabet, name=name)
@classmethod
def from_file(cls, object file, str name = None):
"""Load a scoring matrix from a file-like object.
"""
# ignore lines with comments
lines = filter(lambda line: not line.startswith("#"), file)
# get the header line with the alphabet
header = next(lines, None)
if header is None:
raise ValueError("Missing expected header line")
alphabet = ''.join(header.split())
# get the numerical matrix
matrix = []
for i, line in enumerate(lines):
row = line.split()
try:
float(row[0])
except ValueError:
if row[0] != alphabet[i]:
raise ValueError(f"Matrix must be symmetric (expected row {alphabet[i]!r}, got {row[0]!r})") from None
row = row[1:]
matrix.append(list(map(float, row)))
# create the object with default constructor
return cls(matrix, alphabet=alphabet, name=name)
@classmethod
def from_str(cls, str text, str name = None):
"""Load a scoring matrix from a string.
"""
return cls.from_file(io.StringIO(text))
@classmethod
def from_diagonal(
cls,
object diagonal,
float mismatch_score=0.0,
str alphabet not None = DEFAULT_ALPHABET,
str name = None
):
"""Create a scoring matrix from a diagonal vector.
Arguments:
diagonal (sequence of `float`): The diagonal of the scoring
matrix, used to score character matches.
mismatch_score (`float`): The mismatch score to use for
every mismatches.
alphabet (`str`): The alphabet to use with the scoring matrix.
name (`str` or `None`): A name for the scoring matrix, if any.
Example:
>>> matrix = ScoringMatrix.from_diagonal(
... diagonal=[2, 2, 3, 3],
... mismatch_score=-3.0,
... alphabet="ATGC",
... )
>>> for row in matrix:
... print(row)
[2.0, -3.0, -3.0, -3.0]
[-3.0, 2.0, -3.0, -3.0]
[-3.0, -3.0, 3.0, -3.0]
[-3.0, -3.0, -3.0, 3.0]
.. versionadded:: 0.2.0
"""
cdef list matrix = []
cdef size_t length = len(alphabet)
for i, x in enumerate(diagonal):
row = [ x if j == i else mismatch_score for j in range(length) ]
matrix.append(row)
return cls(matrix, alphabet=alphabet, name=name)
@classmethod
def from_match_mismatch(
cls,
float match_score = 1.0,
float mismatch_score = -1.0,
str alphabet not None = DEFAULT_ALPHABET,
str name = None,
):
"""Create a scoring matrix from two match/mismatch scores.
.. versionadded:: 0.2.0
"""
cdef list matrix = []
cdef size_t length = len(alphabet)
for i in range(length):
row = [ match_score if j == i else mismatch_score for j in range(length) ]
matrix.append(row)
return cls(matrix, alphabet=alphabet, name=name)
# --- Magic methods --------------------------------------------------------
def __cinit__(self):
self._data = NULL
self._matrix = NULL
self._size = 0
self._shape[0] = self._shape[1] = 0
def __init__(
self,
object matrix not None,
str alphabet not None = DEFAULT_ALPHABET,
str name = None,
):
"""__init__(self, matrix, alphabet="ARNDCQEGHILKMFPSTWYVBZX", name=None)\n--\n
Create a new scoring matrix.
Arguments:
matrix (array-like of `float`): A square matrix with dimensions
equal to the ``alphabet`` length, storing the scores for each
pair of characters.
alphabet (`str`): The alphabet used to index the rows and columns
of the scoring matrix.
name (`str` or `None`): The name of the scoring matrix, if any.
Example:
>>> matrix = ScoringMatrix(
... [[91, -114, -31, -123],
... [-114, 100, -125, -31],
... [-31, -125, 100, -114],
... [-123, -31, -114, 91]],
... alphabet="ACGT",
... name="HOXD70",
... )
Raises:
`ValueError`: When the matrix is not a valid matrix or does not
match the given alphabet.
`MemoryError`: When memory for storing the scores could not be
allocated successfully.
"""
cdef ssize_t i
cdef ssize_t j
cdef float x
cdef size_t size = len(alphabet)
if len(alphabet) != len(set(alphabet)):
raise ValueError(f"Duplicate letters found in alphabet: {self.alphabet!r}")
if len(matrix) != size:
raise ValueError("Matrix must contain one row per alphabet letter")
self.alphabet = alphabet
self.name = name
with nogil:
self._allocate(size)
for i, c in enumerate(self.alphabet):
self._alphabet[i] = ord(c)
for i, row in enumerate(matrix):
if len(row) != size:
raise ValueError("Matrix must contain one column per alphabet letter")
for j, x in enumerate(row):
self._matrix[i][j] = x
def __copy__(self):
return self.copy()
def __repr__(self):
cdef str ty = type(self).__name__
cdef list args = [repr(list(self))]
if self.alphabet != ScoringMatrix.DEFAULT_ALPHABET:
args.append(f"alphabet={self.alphabet!r}")
if self.name is not None:
args.append(f"name={self.name!r}")
return f"{ty}({', '.join(args)})"
def __reduce_ex__(self, int protocol):
assert self._data != NULL
assert self._matrix != NULL
# use out-of-band pickling (only supported since protocol 5, see
# https://docs.python.org/3/library/pickle.html#out-of-band-buffers)
if protocol >= 5:
matrix = pickle.PickleBuffer(self)
else:
matrix = list(self)
return (type(self), (matrix, self.alphabet, self.name))
def __getbuffer__(self, Py_buffer* buffer, int flags):
assert self._data != NULL
if flags & PyBUF_FORMAT:
buffer.format = b"f"
else:
buffer.format = NULL
buffer.buf = self._data
buffer.internal = NULL
buffer.itemsize = sizeof(float)
buffer.len = self._nitems * sizeof(float)
buffer.ndim = 2
buffer.obj = self
buffer.readonly = 1
buffer.shape = <Py_ssize_t*> &self._shape
buffer.suboffsets = NULL
buffer.strides = NULL
def __len__(self):
return self._size
def __getitem__(self, object item):
cdef ssize_t index_
cdef list row
if isinstance(item, str) and len(item) == 1:
try:
item = self.alphabet.index(item)
except ValueError:
raise IndexError(f"{item!r} not in matrix alphabet ({self.alphabet!r})") from None
if isinstance(item, int):
index_ = item
if index_ < 0:
index_ += self._size
if index_ < 0 or index_ >= self._size:
raise IndexError(item)
row = []
for i in range(self._size):
row.append(self._matrix[index_][i])
return row
elif isinstance(item, tuple):
if len(item) > 2:
raise IndexError(f"too many indices for array: array is 2-dimensional, but {len(item)!r} were indexed")
i, j = item
if isinstance(i, str) and len(i) == 1:
try:
i = self.alphabet.index(i)
except ValueError:
raise IndexError(f"{i!r} not in matrix alphabet ({self.alphabet!r})") from None
if isinstance(j, str) and len(j) == 1:
try:
j = self.alphabet.index(j)
except ValueError:
raise IndexError(f"{j!r} not in matrix alphabet ({self.alphabet!r})") from None
if isinstance(i, int) and isinstance(j, int):
return self._matrix[i][j]
raise TypeError(item)
def __eq__(self, object other):
assert self._data != NULL
assert self._matrix != NULL
cdef size_t i
cdef ScoringMatrix other_
if not isinstance(other, ScoringMatrix):
return NotImplemented
other_ = other
if other_.alphabet != self.alphabet:
return False
for i in range(self._nitems):
if self._data[i] != other_._data[i]:
return False
return True
# --- Private methods ------------------------------------------------------
cdef int _allocate(self, size_t size) except 1 nogil:
cdef size_t i
self._data = <float*> realloc(self._data, sizeof(float) * size * size)
self._matrix = <float**> realloc(self._matrix, sizeof(float*) * size)
self._alphabet = <char*> realloc(self._alphabet, sizeof(char) * (size + 1))
if self._data is NULL or self._matrix is NULL or self._alphabet is NULL:
raise MemoryError("Failed to allocate matrix")
self._size = self._shape[0] = self._shape[1] = size
self._nitems = self._size * self._size
for i in range(size):
self._matrix[i] = &self._data[i * self._size]
memset(self._alphabet, 0, sizeof(char) * (size + 1))
return 0
# --- Public methods -------------------------------------------------------
cdef size_t size(self) noexcept nogil:
"""Get the size of the scoring matrix.
"""
return self._size
cdef const char* alphabet_ptr(self) except NULL nogil:
"""Get the alphabet of the scoring matrix as a C-string.
"""
if self._alphabet == NULL:
with gil:
raise RuntimeError("uninitialized scoring matrix")
return <const char*> self._alphabet
cdef const float* data_ptr(self) except NULL nogil:
"""Get the matrix scores as a dense array.
"""
if self._data == NULL:
with gil:
raise RuntimeError("uninitialized scoring matrix")
return <const float*> self._data
cdef const float** matrix_ptr(self) except NULL nogil:
"""Get the matrix scores as an array of pointers.
"""
if self._matrix == NULL:
with gil:
raise RuntimeError("uninitialized scoring matrix")
return <const float**> self._matrix
cpdef ScoringMatrix copy(self):
"""Get a copy of the matrix.
"""
return type(self)(self, alphabet=self.alphabet, name=self.name)
cpdef bint is_integer(self):
"""Test whether the scoring matrix is an integer matrix.
Returns:
`bool`: `True` if the matrix only contains integer scores.
Example:
>>> blosum62 = ScoringMatrix.from_name("BLOSUM62")
>>> blosum62.is_integer()
True
>>> benner6 = ScoringMatrix.from_name("BENNER6")
>>> benner6.is_integer()
False
"""
cdef size_t i
cdef float x
cdef bint integer = True
cdef const float* _data = self.data_ptr()
with nogil:
for i in range(self._nitems):
x = _data[i]
if lrintf(x) != x:
integer = False
break
return integer
cpdef bint is_symmetric(self):
"""Test whether the scoring matrix is symmetric.
Returns:
`bool`: `True` if the matrix is a symmetric matrix.
.. versionadded:: 0.2.0
"""
cdef size_t i
cdef size_t j
cdef bint symmetric = True
cdef const float** _matrix = self.matrix_ptr()
with nogil:
for i in range(self._size):
for j in range(i + 1, self._size):
if _matrix[i][j] != _matrix[j][i]:
symmetric = False
break
return symmetric
cpdef float min(self):
"""Get the minimum score of the scoring matrix.
Example:
>>> blosum62 = ScoringMatrix.from_name("BLOSUM62")
>>> blosum62.min()
-4.0
"""
assert self._data != NULL
cdef size_t i
cdef float m = INFINITY
with nogil:
for i in range(self._nitems):
if self._data[i] < m:
m = self._data[i]
return m
cpdef float max(self):
"""Get the maximum score of the scoring matrix.
Example:
>>> blosum62 = ScoringMatrix.from_name("BLOSUM62")
>>> blosum62.max()
11.0
"""
assert self._data != NULL
cdef size_t i
cdef float m = -INFINITY
with nogil:
for i in range(self._nitems):
if self._data[i] > m:
m = self._data[i]
return m
cpdef ScoringMatrix shuffle(self, str alphabet):
"""Shuffle the matrix using the new given alphabet.
The matrix name is retained only when the provided ``alphabet`` is a
permutation of the current alphabet, e.g. there is no loss of data.
Arguments:
alphabet (`str`): The new alphabet to use for the columns. It
must be a subset of ``self.alphabet``.
Raises:
`KeyError`: When some required alphabet letters are missing from
the source matrix alphabet.
Example:
>>> m1 = ScoringMatrix.from_name("BLOSUM62")
>>> m1[1, 1]
5.0
>>> m1['R', 'R']
5.0
>>> m2 = m1.shuffle("ABCDEFGHIKLMNPQRSTVWXYZ*")
>>> m2[1, 1]
4.0
>>> m2['R', 'R']
5.0
"""
cdef size_t i
cdef list indices = []
cdef list matrix = []
for x in alphabet:
try:
indices.append(self.alphabet.index(x))
except ValueError:
raise KeyError(f"new alphabet contains unknown letter: {x!r}") from None
for letter in alphabet:
row = self[<str> letter]
matrix.append([row[j] for j in indices])
name = self.name if len(alphabet) == len(self.alphabet) else None
return type(self)(matrix, alphabet=alphabet, name=name)
cdef extern from "matrices.h" nogil:
const char* _NAMES[]
const char* _ALPHABETS[]
const size_t _SIZES[]
const float* _MATRICES[]
# noqa: D104
from . import test_doctest, test_scoring_matrix
def load_tests(loader, suite, pattern):
test_doctest.load_tests(loader, suite, pattern)
suite.addTests(loader.loadTestsFromModule(test_scoring_matrix))
return suite
# coding: utf-8
"""Test doctest contained tests in every file of the module.
"""
import configparser
import doctest
import importlib
import os
import pkgutil
import re
import shutil
import sys
import types
import warnings
from unittest import mock
import scoring_matrices
def _load_tests_from_module(tests, module, globs, setUp=None, tearDown=None):
"""Load tests from module, iterating through submodules."""
for attr in (getattr(module, x) for x in dir(module) if not x.startswith("_")):
if isinstance(attr, types.ModuleType):
suite = doctest.DocTestSuite(
attr,
globs,
setUp=setUp,
tearDown=tearDown,
optionflags=+doctest.ELLIPSIS,
)
tests.addTests(suite)
return tests
def load_tests(loader, tests, ignore):
"""`load_test` function used by unittest to find the doctests."""
_current_cwd = os.getcwd()
# demonstrate how to use Biopython substitution matrices without
# actually requiring Biopython
Bio = mock.Mock()
Bio.Align = mock.Mock()
Bio.Align.substitution_matrices = mock.Mock()
Bio.Align.substitution_matrices.load = mock.Mock()
Bio.Align.substitution_matrices.load.return_value = feng = mock.Mock()
data = [ [-1 for _ in range(20)] for _ in range(20) ]
for i in range(20):
data[i][i] = 1
feng.alphabet = "ARNDCQEGHILKMFPSTWYV"
feng.__len__ = mock.Mock(return_value=20)
feng.__iter__ = mock.Mock(wraps=data.__iter__)
def setUp(self):
warnings.simplefilter("ignore")
# os.chdir(os.path.realpath(os.path.join(__file__, os.path.pardir, "data")))
def tearDown(self):
# os.chdir(_current_cwd)
warnings.simplefilter(warnings.defaultaction)
# doctests are not compatible with `green`, so we may want to bail out
# early if `green` is running the tests
if sys.argv[0].endswith("green"):
return tests
# recursively traverse all library submodules and load tests from them
packages = [None, scoring_matrices.lib]
for pkg in iter(packages.pop, None):
globs = dict(scoring_matrices=scoring_matrices, Bio=Bio, **pkg.__dict__)
tests.addTests(
doctest.DocTestSuite(
pkg,
globs=globs,
setUp=setUp,
tearDown=tearDown,
optionflags=+doctest.ELLIPSIS,
)
)
return tests
import pickle
import sys
import unittest
from scoring_matrices import ScoringMatrix
class TestScoringMatrix(unittest.TestCase):
def test_from_name_blosum50(self):
matrix = ScoringMatrix.from_name("BLOSUM50")
diagonal = [ matrix[i, i] for i in range(len(matrix)) ]
self.assertEqual(len(diagonal), 24)
self.assertEqual(diagonal, [5, 7, 7, 8, 13, 7, 6, 8, 10, 5, 5, 6, 7, 8, 10, 5, 5, 15, 8, 5, 5, 5 ,-1 ,1])
def test_from_name_blosum62(self):
matrix = ScoringMatrix.from_name("BLOSUM62")
diagonal = [ matrix[i, i] for i in range(len(matrix)) ]
self.assertEqual(len(diagonal), 24)
self.assertEqual(diagonal, [4, 5, 6, 6, 9, 5, 5, 6, 8, 4, 4, 5, 5, 6, 7, 4, 5, 11, 7, 4, 4, 4, -1, 1])
def test_from_name_invalid_name(self):
with self.assertRaises(ValueError):
aa = ScoringMatrix.from_name("nonsensical")
def test_from_str(self):
m1 = ScoringMatrix.from_str(
"""
A T G C
A 5 -4 -4 -4
T -4 5 -4 -4
G -4 -4 5 -4
C -4 -4 -4 5
""".strip()
)
self.assertEqual(m1.alphabet, "ATGC")
self.assertEqual(m1['T', 'A'], -4.0)
self.assertEqual(m1['A', 'A'], 5.0)
m2 = ScoringMatrix.from_str(
"""
A T G C
5 -4 -4 -4
-4 5 -4 -4
-4 -4 5 -4
-4 -4 -4 5
""".strip()
)
self.assertEqual(m2.alphabet, "ATGC")
self.assertEqual(m2['T', 'A'], -4.0)
self.assertEqual(m2['A', 'A'], 5.0)
def test_from_diagonal(self):
m = ScoringMatrix.from_diagonal([1, 2, 3, 4], 0.0, alphabet="ATGC")
self.assertEqual(m[0], [1.0, 0.0, 0.0, 0.0])
self.assertEqual(m[1], [0.0, 2.0, 0.0, 0.0])
self.assertEqual(m[2], [0.0, 0.0, 3.0, 0.0])
self.assertEqual(m[3], [0.0, 0.0, 0.0, 4.0])
m = ScoringMatrix.from_diagonal([1, 2, 3, 4], -1.0, alphabet="ATGC")
self.assertEqual(m[0], [ 1.0, -1.0, -1.0, -1.0])
self.assertEqual(m[1], [-1.0, 2.0, -1.0, -1.0])
self.assertEqual(m[2], [-1.0, -1.0, 3.0, -1.0])
self.assertEqual(m[3], [-1.0, -1.0, -1.0, 4.0])
def test_from_diagonal_invalid_length(self):
self.assertRaises(
ValueError,
ScoringMatrix.from_diagonal,
[ 3, 3, 3, 3, 3, 3 ],
alphabet="ATGC"
)
self.assertRaises(
ValueError,
ScoringMatrix.from_diagonal,
[ 3, 3, 3 ],
alphabet="ATGC"
)
def test_list(self):
aa = ScoringMatrix.from_name("BLOSUM50")
matrix = list(aa)
columns = aa.alphabet
self.assertEqual(len(columns), 24)
self.assertEqual(len(matrix), 24)
for row in matrix:
self.assertEqual(len(row), 24)
@unittest.skipUnless(sys.implementation.name == "cpython", "memoryview not supported")
@unittest.skipUnless(sys.version_info >= (3, 9), "memoryview not supported")
def test_memoryview(self):
aa = ScoringMatrix.from_name("BLOSUM50")
mem = memoryview(aa)
self.assertEqual(mem.shape, (24, 24))
self.assertEqual(mem[0, 0], 5.0) # A <-> A
self.assertEqual(mem[6, 6], 6.0) # E <-> E
def test_init_empty(self):
m = ScoringMatrix([], alphabet="")
self.assertEqual(len(m), 0)
self.assertFalse(bool(m))
def test_init_invalid_length(self):
with self.assertRaises(ValueError):
m = ScoringMatrix(
[
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
],
alphabet="ATGC",
)
with self.assertRaises(ValueError):
m = ScoringMatrix(
[
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0],
],
alphabet="ATGC",
)
def test_eq(self):
sm1 = ScoringMatrix.from_name("BLOSUM50")
sm2 = ScoringMatrix.from_name("BLOSUM50")
sm3 = ScoringMatrix.from_name("BLOSUM62")
self.assertEqual(sm1, sm1)
self.assertEqual(sm1, sm2)
self.assertNotEqual(sm1, sm3)
self.assertNotEqual(sm1, 12)
def test_pickle(self):
sm1 = ScoringMatrix.from_name("BLOSUM62")
sm2 = pickle.loads(pickle.dumps(sm1))
self.assertEqual(sm1.alphabet, sm2.alphabet)
self.assertEqual(list(sm1), list(sm2))
def test_shuffle_invalid_alphabet(self):
matrix = ScoringMatrix.from_name("BLOSUM62")
self.assertRaises(KeyError, matrix.shuffle, "ARNJOU")
def test_shuffle_empty(self):
matrix = ScoringMatrix.from_name("BLOSUM62")
empty = matrix.shuffle("")
self.assertEqual(len(empty), 0)
self.assertFalse(bool(empty))
def test_is_symmetric(self):
matrix = ScoringMatrix.from_name("PAM250")
self.assertTrue(matrix.is_symmetric())
matrix = ScoringMatrix([[1, 2, 3], [4, 5, 6], [7, 8, 9]], "ABC")
self.assertFalse(matrix.is_symmetric())
find_package(Python COMPONENTS Interpreter Development.Module REQUIRED)
get_property(PYTHON_EXTENSIONS_SOURCE_DIR GLOBAL PROPERTY PYTHON_EXTENSIONS_SOURCE_DIR)
# --- Detect PyInterpreterState_GetID ------------------------------------------
include(CheckSymbolExists)
set(SAFE_CMAKE_REQUIRED_INCLUDES "${CMAKE_REQUIRED_INCLUDES}")
set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES} ${Python_INCLUDE_DIRS})
check_symbol_exists(PyInterpreterState_GetID "stdint.h;stdlib.h;Python.h" HAVE_PYINTERPRETERSTATE_GETID)
set(CMAKE_REQUIRED_INCLUDES "${SAFE_CMAKE_REQUIRED_INCLUDES}")
# --- Detect implementation ----------------------------------------------------
string(TOLOWER "${Python_INTERPRETER_ID}" SYS_IMPLEMENTATION_NAME)
# --- Prepare Cython directives and constants ----------------------------------
set(CYTHON_DIRECTIVES
-X cdivision=True
-X nonecheck=False
-E SYS_IMPLEMENTATION_NAME=${SYS_IMPLEMENTATION_NAME}
-E SYS_VERSION_INFO_MAJOR=${Python_VERSION_MAJOR}
-E SYS_VERSION_INFO_MINOR=${Python_VERSION_MINOR}
-E PROJECT_VERSION=${CMAKE_PROJECT_VERSION}
)
if(CMAKE_BUILD_TYPE STREQUAL Debug)
set(CYTHON_DIRECTIVES
${CYTHON_DIRECTIVES}
-X cdivision_warnings=True
-X warn.undeclared=True
-X warn.unreachable=True
-X warn.maybe_uninitialized=True
-X warn.unused=True
-X warn.unused_arg=True
-X warn.unused_result=True
-X warn.multiple_declarators=True
)
if(NOT Python_INTERPRETER_ID STREQUAL PyPy)
set(CYTHON_DIRECTIVES
${CYTHON_DIRECTIVES}
-X linetrace=true
)
endif()
else()
set(CYTHON_DIRECTIVES
${CYTHON_DIRECTIVES}
-X boundscheck=False
-X wraparound=False
)
endif()
# --- Declare Cython extension -------------------------------------------------
macro(cython_extension _name)
set(multiValueArgs LINKS DIRECTIVES SOURCES OPTIONS)
cmake_parse_arguments(CYTHON_EXTENSION "" "" "${multiValueArgs}" ${ARGN} )
# Make sure that the source directory is known
if(NOT DEFINED PYTHON_EXTENSIONS_SOURCE_DIR)
message(FATAL_ERROR "The PYTHON_EXTENSIONS_SOURCE_DIR variable has not been set.")
endif()
# Generate C++ file from Cython file
add_custom_command(
OUTPUT
${_name}.c
COMMENT
"Cythonizing ${_name}.pyx to ${_name}.c"
COMMAND
Python::Interpreter -m cython
"${CMAKE_CURRENT_SOURCE_DIR}/${_name}.pyx"
--output-file ${_name}.c
--depfile
-I "${CYTHON_HEADERS_DIR}"
${CYTHON_DIRECTIVES}
${CYTHON_EXTENSION_DIRECTIVES}
MAIN_DEPENDENCY
${_name}.pyx
DEPFILE
${_name}.c.dep
VERBATIM)
# Build fully-qualified module name as the target name
string(REGEX REPLACE "^${PYTHON_EXTENSIONS_SOURCE_DIR}/?" "" _dest_folder ${CMAKE_CURRENT_SOURCE_DIR})
string(REPLACE "/" "." _target ${_dest_folder}.${_name})
# Add Python module
set(EXTENSION_SOURCES ${_name}.pyx ${_name}.c)
if(EXISTS ${_name}.pxd)
set(EXTENSION_SOURCES ${EXTENSION_SOURCES} ${_name}.pxd)
endif()
if(CYTHON_EXTENSION_SOURCES)
set(EXTENSION_SOURCES ${EXTENSION_SOURCES} ${CYTHON_EXTENSION_SOURCES})
endif()
python_add_library(${_target} MODULE WITH_SOABI ${EXTENSION_SOURCES})
set_target_properties(${_target} PROPERTIES OUTPUT_NAME ${_name})
set_target_properties(${_target} PROPERTIES CXX_STANDARD 17)
target_include_directories(${_target} AFTER PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(${_target} PUBLIC ${CYTHON_EXTENSION_LINKS})
if(CYTHON_EXTENSION_OPTIONS)
target_compile_options(${_target} PUBLIC ${CYTHON_EXTENSION_OPTIONS})
endif()
if(HAVE_PYINTERPRETERSTATE_GETID)
target_compile_definitions(${_target} PUBLIC HAVE_PYINTERPRETERSTATE_GETID)
endif()
if(CMAKE_BUILD_TYPE STREQUAL Debug)
if(NOT Python_INTERPRETER_ID STREQUAL PyPy)
target_compile_definitions(${_target} PUBLIC CYTHON_TRACE_NOGIL=1)
endif()
else()
target_compile_definitions(${_target} PUBLIC CYTHON_WITHOUT_ASSERTIONS=1)
endif()
# Preserve the relative project structure in the install directory
string(REGEX REPLACE "^${PYTHON_EXTENSIONS_SOURCE_DIR}/?" "" _dest_folder ${CMAKE_CURRENT_SOURCE_DIR})
install(TARGETS ${_target} DESTINATION ${_dest_folder} )
message(DEBUG "Install folder for extension ${_name}: ${_dest_folder}")
# Add the targets to the list of Cython extensions
get_property(_ext GLOBAL PROPERTY PYRODIGAL_CYTHON_EXTENSIONS)
list(APPEND _ext ${_target})
set_property(GLOBAL PROPERTY PYRODIGAL_CYTHON_EXTENSIONS ${_ext})
endmacro()
import argparse
import itertools
import os
def _parse_matrix_file(matrix_file):
with open(matrix_file) as f:
lines = filter(
lambda line: line and not line.startswith("#"),
map(str.strip, f),
)
letters = ''.join(next(lines).split())
matrix = [
list(map(float, line.strip().split()[1:]))
for line in map(str.strip, lines)
if line
]
return letters, matrix
def _generate_matrices(matrix_files, output_file):
matrices = {}
for matrix_file in matrix_files:
matrix_name = os.path.splitext(os.path.basename(matrix_file))[0].upper()
matrices[matrix_name] = _parse_matrix_file(matrix_file)
with open(output_file, "w") as dst:
dst.write("#include <stddef.h>\n")
names = sorted(matrices.keys())
ids = [ name.replace(".", "_") for name in names ]
dst.write(f"const char* _NAMES[{len(names) + 1}] = {{")
for name in names:
dst.write(f'"{name}", ')
dst.write("NULL };\n")
dst.write(f"const char* _ALPHABETS[{len(names) + 1}] = {{")
for name in names:
alphabet, _ = matrices[name]
dst.write(f'"{alphabet}", ')
dst.write("NULL };\n")
dst.write(f"const size_t _SIZES[{len(names) + 1}] = {{")
for name in names:
alphabet, _ = matrices[name]
dst.write(f'{len(alphabet)}, ')
dst.write("-1 };\n")
for i, (name, id_) in enumerate(zip(names, ids)):
alphabet, matrix = matrices[name]
nitems = len(matrix) * len(matrix)
dst.write(f"float _MATRIX_{id_}[{nitems}] = {{")
for i, item in enumerate(itertools.chain.from_iterable(matrix)):
if i != 0:
dst.write(", ")
dst.write(f"{item!r}F")
dst.write("};\n")
dst.write(f"const float* _MATRICES[{len(names) + 1}] = {{")
for id_ in ids:
dst.write(f'_MATRIX_{id_}, ')
dst.write("NULL };\n")
parser = argparse.ArgumentParser()
parser.add_argument("--output", required=True)
parser.add_argument("inputs", nargs="+")
args = parser.parse_args()
_generate_matrices(args.inputs, args.output)
+31
-14
Metadata-Version: 2.1
Name: scoring-matrices
Version: 0.2.2
Version: 0.3.0rc1
Summary: Dependency free, Cython-compatible scoring matrices to use with biological sequences.
Home-page: https://github.com/althonos/score-matrices
Author: Martin Larralde
Author-email: martin.larralde@embl.de
License: MIT
Project-URL: Bug Tracker, https://github.com/althonos/scoring-matrices/issues
Project-URL: Changelog, https://github.com/althonos/scoring-matrices/blob/main/CHANGELOG.md
Project-URL: Coverage, https://codecov.io/gh/althonos/scoring-matrices/
Project-URL: Builds, https://github.com/althonos/scoring-matrices/actions
Keywords: bioinformatics,sequence,substitution,matrix,score
Platform: any
Classifier: Development Status :: 3 - Alpha
Author-Email: Martin Larralde <martin.larralde@embl.de>
License: MIT License
Copyright (c) 2024 Martin Larralde <martin.larralde@embl.de>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers

@@ -22,3 +35,2 @@ Classifier: Intended Audience :: Science/Research

Classifier: Programming Language :: Cython
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7

@@ -34,5 +46,10 @@ Classifier: Programming Language :: Python :: 3.8

Classifier: Typing :: Typed
Requires-Python: >=3.5
Project-URL: Documentation, https://scoring-matrices.readthedocs.io/en/stable/
Project-URL: Bug tracker, https://github.com/althonos/scoring-matrices/issues
Project-URL: Changelog, https://github.com/althonos/scoring-matrices/blob/main/CHANGELOG.md
Project-URL: Coverage, https://codecov.io/gh/althonos/scoring-matrices/
Project-URL: Builds, https://github.com/althonos/scoring-matrices/actions
Project-URL: Pypi, https://pypi.org/project/scoring-matrices
Requires-Python: >=3.7
Description-Content-Type: text/markdown
License-File: COPYING

@@ -39,0 +56,0 @@ # 🧬🔠 `scoring-matrices` [![Stars](https://img.shields.io/github/stars/althonos/scoring-matrices.svg?style=social&maxAge=3600&label=Star)](https://github.com/althonos/scoring-matrices/stargazers)

[build-system]
requires = ['setuptools >=46.4', 'cython ~=3.0']
build-backend = "setuptools.build_meta"
requires = ["scikit-build-core", "cython >=3.0"]
build-backend = "scikit_build_core.build"
[project]
name = "scoring-matrices"
version = "0.3.0-rc1"
description = "Dependency free, Cython-compatible scoring matrices to use with biological sequences."
readme = "README.md"
requires-python = ">=3.7"
license = { file = "COPYING" }
authors = [
{ name = "Martin Larralde", email = "martin.larralde@embl.de" },
]
keywords = ["bioinformatics", "sequence", "substitution", "matrix", "score"]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: C",
"Programming Language :: Cython",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"Topic :: Scientific/Engineering :: Bio-Informatics",
"Typing :: Typed",
]
[project.urls]
"Documentation" = "https://scoring-matrices.readthedocs.io/en/stable/"
"Bug Tracker" = "https://github.com/althonos/scoring-matrices/issues"
"Changelog" = "https://github.com/althonos/scoring-matrices/blob/main/CHANGELOG.md"
"Coverage" = "https://codecov.io/gh/althonos/scoring-matrices/"
"Builds" = "https://github.com/althonos/scoring-matrices/actions"
"PyPI" = "https://pypi.org/project/scoring-matrices"
[tool.scikit-build]
build-dir = "build/{build_type}"
editable.rebuild = true
editable.verbose = false
[[tool.scikit-build.generate]]
path = "src/scoring_matrices/_version.py"
template = '__version__ = "${version}"'
location = 'source'
[[tool.scikit-build.overrides]]
if.state = "editable"
cmake.build-type = "Debug"
[[tool.scikit-build-overrides]]
if.env.SCCACHE = true
cmake.define.CMAKE_C_COMPILER_LAUNCHER = "sccache"
cmake.define.CMAKE_CXX_COMPILER_LAUNCHER = "sccache"
[[tool.scikit-build-overrides]]
if.env.MOLD = true
cmake.define.CMAKE_LINKER_TYPE = "mold"
#[tool.coverage.run]
#plugins = ["Cython.Coverage"]
[tool.coverage.report]
include = ["src/scoring_matrices/*"]
omit = ["src/scoring_matrices/tests/*"]
show_missing = true
exclude_lines = [
"pragma: no cover",
"if typing.TYPE_CHECKING:",
"@abc.abstractmethod",
"@abc.abstractproperty",
"raise NotImplementedError",
"return NotImplemented",
"raise UnexpectedError",
"raise AllocationError",
]
[tool.coverage.paths]
source = [
"src/scoring_matrices/",
"build/Debug/src/scoring_matrices/",
]
[tool.mypy]
disallow_any_decorated = true
disallow_any_generics = true
disallow_any_unimported = false
disallow_subclassing_any = false
disallow_untyped_calls = true
disallow_untyped_defs = true
ignore_missing_imports = true
warn_unused_ignores = true
warn_return_any = true
exclude = ["tests/.*\\.py"]
["tool.mypy-scoring_matrices.tests.*"]
ignore_errors = true
[tool.isort]
known_first_party = ["scoring_matrices"]
known_standard_library = ["cpython", "cython", "_unicode"]
known_third_party = ["libc", "libcpp"]
line_length = 88
profile = "black"
skip_gitignore = true

Sorry, the diff of this file is not supported yet

Metadata-Version: 2.1
Name: scoring-matrices
Version: 0.2.2
Summary: Dependency free, Cython-compatible scoring matrices to use with biological sequences.
Home-page: https://github.com/althonos/score-matrices
Author: Martin Larralde
Author-email: martin.larralde@embl.de
License: MIT
Project-URL: Bug Tracker, https://github.com/althonos/scoring-matrices/issues
Project-URL: Changelog, https://github.com/althonos/scoring-matrices/blob/main/CHANGELOG.md
Project-URL: Coverage, https://codecov.io/gh/althonos/scoring-matrices/
Project-URL: Builds, https://github.com/althonos/scoring-matrices/actions
Keywords: bioinformatics,sequence,substitution,matrix,score
Platform: any
Classifier: Development Status :: 3 - Alpha
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: Science/Research
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: C
Classifier: Programming Language :: Cython
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
Classifier: Typing :: Typed
Requires-Python: >=3.5
Description-Content-Type: text/markdown
License-File: COPYING
# 🧬🔠 `scoring-matrices` [![Stars](https://img.shields.io/github/stars/althonos/scoring-matrices.svg?style=social&maxAge=3600&label=Star)](https://github.com/althonos/scoring-matrices/stargazers)
*Dependency free, [Cython](https://cython.org/)-compatible scoring matrices to use with biological sequences.*
[![Actions](https://img.shields.io/github/actions/workflow/status/althonos/scoring-matrices/test.yml?branch=main&logo=github&style=flat-square&maxAge=300)](https://github.com/althonos/scoring-matrices/actions)
[![Coverage](https://img.shields.io/codecov/c/gh/althonos/scoring-matrices?style=flat-square&maxAge=3600&logo=codecov)](https://codecov.io/gh/althonos/scoring-matrices/)
[![License](https://img.shields.io/badge/license-MIT-blue.svg?style=flat-square&maxAge=2678400)](https://choosealicense.com/licenses/mit/)
[![PyPI](https://img.shields.io/pypi/v/scoring-matrices.svg?style=flat-square&maxAge=3600&logo=PyPI)](https://pypi.org/project/scoring-matrices)
[![Bioconda](https://img.shields.io/conda/vn/bioconda/scoring-matrices?style=flat-square&maxAge=3600&logo=anaconda)](https://anaconda.org/bioconda/scoring-matrices)
[![AUR](https://img.shields.io/aur/version/python-scoring-matrices?logo=archlinux&style=flat-square&maxAge=3600)](https://aur.archlinux.org/packages/python-scoring-matrices)
[![Wheel](https://img.shields.io/pypi/wheel/scoring-matrices.svg?style=flat-square&maxAge=3600)](https://pypi.org/project/scoring-matrices/#files)
[![Python Versions](https://img.shields.io/pypi/pyversions/scoring-matrices.svg?style=flat-square&maxAge=600&logo=python)](https://pypi.org/project/scoring-matrices/#files)
[![Python Implementations](https://img.shields.io/pypi/implementation/scoring-matrices.svg?style=flat-square&maxAge=600&label=impl)](https://pypi.org/project/scoring-matrices/#files)
[![Source](https://img.shields.io/badge/source-GitHub-303030.svg?maxAge=2678400&style=flat-square)](https://github.com/althonos/scoring-matrices/)
[![Issues](https://img.shields.io/github/issues/althonos/scoring-matrices.svg?style=flat-square&maxAge=600)](https://github.com/althonos/scoring-matrices/issues)
[![Docs](https://img.shields.io/readthedocs/scoring-matrices/latest?style=flat-square&maxAge=600)](https://scoring-matrices.readthedocs.io)
[![Changelog](https://img.shields.io/badge/keep%20a-changelog-8A0707.svg?maxAge=2678400&style=flat-square)](https://github.com/althonos/scoring-matrices/blob/main/CHANGELOG.md)
[![Downloads](https://img.shields.io/pypi/dm/scoring-matrices?style=flat-square&color=303f9f&maxAge=86400&label=downloads)](https://pepy.tech/project/scoring-matrices)
## 🗺️ Overview
*Scoring Matrices* are matrices used to score the matches and mismatches between
two characters are the same position in a sequence alignment. Some of these
matrices are derived from *substitution matrices*, which uses evolutionary
modeling.
The `scoring-matrices` package is a dependency-free, batteries included library
to handle and distribute common substitution matrices:
- **no external dependencies**: The matrices are distributed as-is: you don't
need the whole [Biopython](https://biopython.org) ecosystem, or even
[NumPy](https://numpy.org/).
- **Cython compatibility**: The `ScoringMatrix` is a Cython class that can be
inherited, and the matrix data can be accessed as either a raw pointer, or
a [typed memoryview](https://cython.readthedocs.io/en/latest/src/userguide/memoryviews.html).
- **most common matrices**: The package distributes most common matrices, such as
those used by the NCBI BLAST+ suite, including:
- [*PAM*](https://en.wikipedia.org/wiki/Point_accepted_mutation#) matrices by Dayhoff *et al.* (1978).
- [*BLOSUM*](https://en.wikipedia.org/wiki/BLOSUM) matrices by Henikoff & Henikoff (1992).
- *VTML* matrices by Muller *et al.* (2002).
- *BENNER* matrices by Benner *et al.* (1994).
## 🔧 Installing
`scoring-matrices` can be installed directly from [PyPI](https://pypi.org/project/scoring-matrices/),
which hosts some pre-built wheels for the x86-64 architecture (Linux/OSX/Windows)
and the Aarch64 architecture (Linux/OSX), as well as the code required to
compile from source with Cython:
```console
$ pip install scoring-matrices
```
Otherwise, `scoring-matrices` is also available as a [Bioconda](https://bioconda.github.io/)
package:
```console
$ conda install bioconda::scoring-matrices
```
## 💡 Usage
### Python
- Import the `ScoringMatrix` class from the installed module:
```python
from scoring_matrices import ScoringMatrix
```
- Load one of the built-in matrices:
```python
blosum62 = ScoringMatrix.from_name("BLOSUM62")
```
- Get individual matrix weights either by index or by alphabet letter:
```python
x = blosum62[0, 0]
y = blosum62['A', 'A']
```
- Get a row of the matrix either by index or by alphabet letter:
```python
row_x = blosum62[0]
row_y = blosum62['A']
```
### Cython
- Access the matrix weights as raw pointers to constant data:
```cython
from scoring_matrices cimport ScoringMatrix
cdef ScoringMatrix blosum = ScoringMatrix.from_name("BLOSUM62")
cdef const float* data = blosum.data_ptr() # dense array
cdef const float** matrix = blosum.matrix_ptr() # array of pointers
```
- Access the `ScoringMatrix` weights as a [typed memoryview](https://cython.readthedocs.io/en/latest/src/userguide/memoryviews.html)
using the *buffer protocol* in more recents versions of Python:
```cython
from scoring_matrices cimport ScoringMatrix
cdef ScoringMatrix blosum = ScoringMatrix.from_name("BLOSUM62")
cdef const float[:, :] weights = blosum
```
## 💭 Feedback
### ⚠️ Issue Tracker
Found a bug ? Have an enhancement request ? Head over to the [GitHub issue tracker](https://github.com/althonos/scoring-matrices/issues)
if you need to report or ask something. If you are filing in on a bug,
please include as much information as you can about the issue, and try to
recreate the same bug in a simple, easily reproducible situation.
### 🏗️ Contributing
Contributions are more than welcome! See
[`CONTRIBUTING.md`](https://github.com/althonos/scoring-matrices/blob/main/CONTRIBUTING.md)
for more details.
## 📋 Changelog
This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html)
and provides a [changelog](https://github.com/althonos/scoring-matrices/blob/main/CHANGELOG.md)
in the [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) format.
## ⚖️ License
This library is provided under the [MIT License](https://choosealicense.com/licenses/mit/).
Matrices were collected from the [MMseqs2](https://github.com/soedinglab/MMseqs2),
[Biopython](https://github.com/biopython/biopython/tree/master/Bio/Align/substitution_matrices/data)
and [NCBI BLAST+](https://ftp.ncbi.nih.gov/blast/matrices/) sources and are believed to
be in the public domain.
*This project was developed by [Martin Larralde](https://github.com/althonos/)
during his PhD project at the [Leiden University Medical Center](https://www.lumc.nl/en/) in the [Zeller team](https://github.com/zellerlab).*
COPYING
README.md
pyproject.toml
setup.cfg
setup.py
scoring_matrices/__init__.pxd
scoring_matrices/__init__.py
scoring_matrices/lib.pxd
scoring_matrices/lib.pyi
scoring_matrices/lib.pyx
scoring_matrices/matrices.h
scoring_matrices/matrices.pxd
scoring_matrices/py.typed
scoring_matrices.egg-info/PKG-INFO
scoring_matrices.egg-info/SOURCES.txt
scoring_matrices.egg-info/dependency_links.txt
scoring_matrices.egg-info/not-zip-safe
scoring_matrices.egg-info/top_level.txt
scoring_matrices/tests/__init__.py
scoring_matrices/tests/test_doctest.py
scoring_matrices/tests/test_scoring_matrix.py
from .lib cimport ScoringMatrix
__version__ = "0.2.2"
__author__ = "Martin Larralde <martin.larralde@embl.de>"
__license__ = "MIT"
from . import lib
from .lib import ScoringMatrix
__doc__ = lib.__doc__
# distutils: language = c
# cython: language_level=3, linetrace=True, binding=True
cdef class ScoringMatrix:
cdef readonly str name
cdef readonly str alphabet
cdef size_t _size
cdef size_t _nitems
cdef Py_ssize_t[2] _shape
cdef float* _data
cdef float** _matrix
cdef char* _alphabet
cdef int _allocate(self, size_t length) except 1 nogil
cdef const float* data_ptr(self) except NULL nogil
cdef const float** matrix_ptr(self) except NULL nogil
cdef const char* alphabet_ptr(self) except NULL nogil
cdef size_t size(self) noexcept nogil
cpdef bint is_integer(self)
cpdef bint is_symmetric(self)
cpdef float min(self)
cpdef float max(self)
cpdef ScoringMatrix copy(self)
cpdef ScoringMatrix shuffle(self, str alphabet)
import typing
from typing import TypeVar, Type, Optional, TextIO, Sequence, ClassVar, Tuple, List
S = TypeVar("S")
class ScoringMatrix:
DEFAULT_ALPHABET: ClassVar[str]
@classmethod
def from_name(cls: Type[S], name: str = "BLOSUM62") -> S: ...
@classmethod
def from_file(cls: Type[S], file: TextIO, name: Optional[str] = None) -> S: ...
@classmethod
def from_str(cls: Type[S], text: str, name: Optional[str] = None) -> S: ...
@classmethod
def from_diagonal(
cls: Type[S],
diagonal: Iterable[float],
mismatch_score: float = 0.0,
alphabet: str = DEFAULT_ALPHABET,
name: Optional[str] = None,
) -> S: ...
@classmethod
def from_match_mismatch(
cls: Type[S],
match_score: float = 1.0,
mismatch_score: float = -0.0,
alphabet: str = DEFAULT_ALPHABET,
name: Optional[str] = None,
) -> S: ...
def __init__(
self,
matrix: Sequence[Sequence[float]],
alphabet: str = DEFAULT_ALPHABET,
name: Optional[str] = None,
): ...
def __copy__(self: S) -> S: ...
def __repr__(self) -> str: ...
def __reduce_ex__(self, protocol: object) -> Tuple[object, ...]: ...
def __len__(self) -> int: ...
def __eq__(self, other: object) -> bool: ...
@property
def alphabet(self) -> str: ...
@property
def name(self) -> Optional[str]: ...
@typing.overload
def __getitem__(self, item: int) -> List[float]: ...
@typing.overload
def __getitem__(self, item: str) -> List[float]: ...
@typing.overload
def __getitem__(self, item: Tuple[int, int]) -> float: ...
@typing.overload
def __getitem__(self, item: Tuple[str, str]) -> float: ...
def copy(self: S) -> S: ...
def is_integer(self) -> bool: ...
def is_symmetric(self) -> bool: ...
def min(self) -> float: ...
def max(self) -> float: ...
def shuffle(self: S, alphabet: str) -> S: ...
# distutils: language = c
# cython: language_level=3, linetrace=True, binding=True
"""Dependency free, Cython-compatible scoring matrices for bioinformatics.
"""
cimport cython
from cpython.memoryview cimport PyMemoryView_FromMemory
from cpython.buffer cimport PyBUF_FORMAT, PyBUF_READ, PyBUF_WRITE
from libc.math cimport INFINITY, lrintf
from libc.stdlib cimport realloc, free
from libc.string cimport memcpy, memset
from .matrices cimport _NAMES, _ALPHABETS, _SIZES, _MATRICES
import io
import pickle
cdef dict _INDICES = {
_NAMES[i].decode('ascii'):i
for i in range(sizeof(_NAMES) /sizeof(const char*) - 1)
}
cdef class ScoringMatrix:
"""A scoring matrix to use for biological sequence alignments.
"""
DEFAULT_ALPHABET = "ARNDCQEGHILKMFPSTWYVBZX*"
BUILTIN_MATRICES = frozenset(_INDICES)
# --- Constructors ---------------------------------------------------------
@classmethod
def from_name(cls, str name not None = "BLOSUM62"):
"""Load a built-in scoring matrix by name.
Arguments:
name (`str`): The name of the scoring matrix.
Example:
>>> blosum62 = ScoringMatrix.from_name("BLOSUM62")
Raises:
`ValueError`: When no scoring matrix with the given ``name``
can be found in the embedded matrix data.
"""
cdef size_t i
cdef size_t j
cdef size_t k
cdef list rows
try:
i = _INDICES[name]
except KeyError as err:
raise ValueError(f"Unknown scoring matrix: {name!r}") from err
else:
alphabet = _ALPHABETS[i].decode('ascii')
rows = []
for j in range(0, _SIZES[i] * _SIZES[i], _SIZES[i]):
row = []
for k in range(_SIZES[i]):
row.append(_MATRICES[i][j + k])
rows.append(row)
return cls(rows, alphabet=alphabet, name=name)
@classmethod
def from_file(cls, object file, str name = None):
"""Load a scoring matrix from a file-like object.
"""
# ignore lines with comments
lines = filter(lambda line: not line.startswith("#"), file)
# get the header line with the alphabet
header = next(lines, None)
if header is None:
raise ValueError("Missing expected header line")
alphabet = ''.join(header.split())
# get the numerical matrix
matrix = []
for i, line in enumerate(lines):
row = line.split()
try:
float(row[0])
except ValueError:
if row[0] != alphabet[i]:
raise ValueError(f"Matrix must be symmetric (expected row {alphabet[i]!r}, got {row[0]!r})") from None
row = row[1:]
matrix.append(list(map(float, row)))
# create the object with default constructor
return cls(matrix, alphabet=alphabet, name=name)
@classmethod
def from_str(cls, str text, str name = None):
"""Load a scoring matrix from a string.
"""
return cls.from_file(io.StringIO(text))
@classmethod
def from_diagonal(
cls,
object diagonal,
float mismatch_score=0.0,
str alphabet not None = DEFAULT_ALPHABET,
str name = None
):
"""Create a scoring matrix from a diagonal vector.
Arguments:
diagonal (sequence of `float`): The diagonal of the scoring
matrix, used to score character matches.
mismatch_score (`float`): The mismatch score to use for
every mismatches.
alphabet (`str`): The alphabet to use with the scoring matrix.
name (`str` or `None`): A name for the scoring matrix, if any.
Example:
>>> matrix = ScoringMatrix.from_diagonal(
... diagonal=[2, 2, 3, 3],
... mismatch_score=-3.0,
... alphabet="ATGC",
... )
>>> for row in matrix:
... print(row)
[2.0, -3.0, -3.0, -3.0]
[-3.0, 2.0, -3.0, -3.0]
[-3.0, -3.0, 3.0, -3.0]
[-3.0, -3.0, -3.0, 3.0]
.. versionadded:: 0.2.0
"""
cdef list matrix = []
cdef size_t length = len(alphabet)
for i, x in enumerate(diagonal):
row = [ x if j == i else mismatch_score for j in range(length) ]
matrix.append(row)
return cls(matrix, alphabet=alphabet, name=name)
@classmethod
def from_match_mismatch(
cls,
float match_score = 1.0,
float mismatch_score = -1.0,
str alphabet not None = DEFAULT_ALPHABET,
str name = None,
):
"""Create a scoring matrix from two match/mismatch scores.
.. versionadded:: 0.2.0
"""
cdef list matrix = []
cdef size_t length = len(alphabet)
for i in range(length):
row = [ match_score if j == i else mismatch_score for j in range(length) ]
matrix.append(row)
return cls(matrix, alphabet=alphabet, name=name)
# --- Magic methods --------------------------------------------------------
def __cinit__(self):
self._data = NULL
self._matrix = NULL
self._size = 0
self._shape[0] = self._shape[1] = 0
def __init__(
self,
object matrix not None,
str alphabet not None = DEFAULT_ALPHABET,
str name = None,
):
"""Create a new scoring matrix.
Arguments:
matrix (array-like of `float`): A square matrix with dimensions
equal to the ``alphabet`` length, storing the scores for each
pair of characters.
alphabet (`str`): The alphabet used to index the rows and columns
of the scoring matrix.
name (`str` or `None`): The name of the scoring matrix, if any.
Example:
>>> matrix = ScoringMatrix(
... [[91, -114, -31, -123],
... [-114, 100, -125, -31],
... [-31, -125, 100, -114],
... [-123, -31, -114, 91]],
... alphabet="ACGT",
... name="HOXD70",
... )
Raises:
`ValueError`: When the matrix is not a valid matrix or does not
match the given alphabet.
`MemoryError`: When memory for storing the scores could not be
allocated successfully.
"""
cdef ssize_t i
cdef ssize_t j
cdef float x
cdef size_t size = len(alphabet)
if len(alphabet) != len(set(alphabet)):
raise ValueError(f"Duplicate letters found in alphabet: {self.alphabet!r}")
if len(matrix) != size:
raise ValueError("Matrix must contain one row per alphabet letter")
self.alphabet = alphabet
self.name = name
with nogil:
self._allocate(size)
for i, c in enumerate(self.alphabet):
self._alphabet[i] = ord(c)
for i, row in enumerate(matrix):
if len(row) != size:
raise ValueError("Matrix must contain one column per alphabet letter")
for j, x in enumerate(row):
self._matrix[i][j] = x
def __copy__(self):
return self.copy()
def __repr__(self):
cdef str ty = type(self).__name__
cdef list args = [repr(list(self))]
if self.alphabet != ScoringMatrix.DEFAULT_ALPHABET:
args.append(f"alphabet={self.alphabet!r}")
if self.name is not None:
args.append(f"name={self.name!r}")
return f"{ty}({', '.join(args)})"
def __reduce_ex__(self, int protocol):
assert self._data != NULL
assert self._matrix != NULL
# use out-of-band pickling (only supported since protocol 5, see
# https://docs.python.org/3/library/pickle.html#out-of-band-buffers)
if protocol >= 5:
matrix = pickle.PickleBuffer(self)
else:
matrix = list(self)
return (type(self), (matrix, self.alphabet, self.name))
def __getbuffer__(self, Py_buffer* buffer, int flags):
assert self._data != NULL
if flags & PyBUF_FORMAT:
buffer.format = b"f"
else:
buffer.format = NULL
buffer.buf = self._data
buffer.internal = NULL
buffer.itemsize = sizeof(float)
buffer.len = self._nitems * sizeof(float)
buffer.ndim = 2
buffer.obj = self
buffer.readonly = 1
buffer.shape = <Py_ssize_t*> &self._shape
buffer.suboffsets = NULL
buffer.strides = NULL
def __len__(self):
return self._size
def __getitem__(self, object item):
cdef ssize_t index_
cdef list row
if isinstance(item, str) and len(item) == 1:
try:
item = self.alphabet.index(item)
except ValueError:
raise IndexError(f"{item!r} not in matrix alphabet ({self.alphabet!r})") from None
if isinstance(item, int):
index_ = item
if index_ < 0:
index_ += self._size
if index_ < 0 or index_ >= self._size:
raise IndexError(item)
row = []
for i in range(self._size):
row.append(self._matrix[index_][i])
return row
elif isinstance(item, tuple):
if len(item) > 2:
raise IndexError(f"too many indices for array: array is 2-dimensional, but {len(item)!r} were indexed")
i, j = item
if isinstance(i, str) and len(i) == 1:
try:
i = self.alphabet.index(i)
except ValueError:
raise IndexError(f"{i!r} not in matrix alphabet ({self.alphabet!r})") from None
if isinstance(j, str) and len(j) == 1:
try:
j = self.alphabet.index(j)
except ValueError:
raise IndexError(f"{j!r} not in matrix alphabet ({self.alphabet!r})") from None
if isinstance(i, int) and isinstance(j, int):
return self._matrix[i][j]
raise TypeError(item)
def __eq__(self, object other):
assert self._data != NULL
assert self._matrix != NULL
cdef size_t i
cdef ScoringMatrix other_
if not isinstance(other, ScoringMatrix):
return NotImplemented
other_ = other
if other_.alphabet != self.alphabet:
return False
for i in range(self._nitems):
if self._data[i] != other_._data[i]:
return False
return True
# --- Private methods ------------------------------------------------------
cdef int _allocate(self, size_t size) except 1 nogil:
cdef size_t i
self._data = <float*> realloc(self._data, sizeof(float) * size * size)
self._matrix = <float**> realloc(self._matrix, sizeof(float*) * size)
self._alphabet = <char*> realloc(self._alphabet, sizeof(char) * (size + 1))
if self._data is NULL or self._matrix is NULL or self._alphabet is NULL:
raise MemoryError("Failed to allocate matrix")
self._size = self._shape[0] = self._shape[1] = size
self._nitems = self._size * self._size
for i in range(size):
self._matrix[i] = &self._data[i * self._size]
memset(self._alphabet, 0, sizeof(char) * (size + 1))
return 0
# --- Public methods -------------------------------------------------------
cdef size_t size(self) noexcept nogil:
"""Get the size of the scoring matrix.
"""
return self._size
cdef const char* alphabet_ptr(self) except NULL nogil:
"""Get the alphabet of the scoring matrix as a C-string.
"""
if self._alphabet == NULL:
with gil:
raise RuntimeError("uninitialized scoring matrix")
return <const char*> self._alphabet
cdef const float* data_ptr(self) except NULL nogil:
"""Get the matrix scores as a dense array.
"""
if self._data == NULL:
with gil:
raise RuntimeError("uninitialized scoring matrix")
return <const float*> self._data
cdef const float** matrix_ptr(self) except NULL nogil:
"""Get the matrix scores as an array of pointers.
"""
if self._matrix == NULL:
with gil:
raise RuntimeError("uninitialized scoring matrix")
return <const float**> self._matrix
cpdef ScoringMatrix copy(self):
"""Get a copy of the matrix.
"""
return type(self)(self, alphabet=self.alphabet, name=self.name)
cpdef bint is_integer(self):
"""Test whether the scoring matrix is an integer matrix.
Returns:
`bool`: `True` if the matrix only contains integer scores.
Example:
>>> blosum62 = ScoringMatrix.from_name("BLOSUM62")
>>> blosum62.is_integer()
True
>>> benner6 = ScoringMatrix.from_name("BENNER6")
>>> benner6.is_integer()
False
"""
cdef size_t i
cdef float x
cdef bint integer = True
cdef const float* _data = self.data_ptr()
with nogil:
for i in range(self._nitems):
x = _data[i]
if lrintf(x) != x:
integer = False
break
return integer
cpdef bint is_symmetric(self):
"""Test whether the scoring matrix is symmetric.
Returns:
`bool`: `True` if the matrix is a symmetric matrix.
.. versionadded:: 0.2.0
"""
cdef size_t i
cdef size_t j
cdef bint symmetric = True
cdef const float** _matrix = self.matrix_ptr()
with nogil:
for i in range(self._size):
for j in range(i + 1, self._size):
if _matrix[i][j] != _matrix[j][i]:
symmetric = False
break
return symmetric
cpdef float min(self):
"""Get the minimum score of the scoring matrix.
Example:
>>> blosum62 = ScoringMatrix.from_name("BLOSUM62")
>>> blosum62.min()
-4.0
"""
assert self._data != NULL
cdef size_t i
cdef float m = INFINITY
with nogil:
for i in range(self._nitems):
if self._data[i] < m:
m = self._data[i]
return m
cpdef float max(self):
"""Get the maximum score of the scoring matrix.
Example:
>>> blosum62 = ScoringMatrix.from_name("BLOSUM62")
>>> blosum62.max()
11.0
"""
assert self._data != NULL
cdef size_t i
cdef float m = -INFINITY
with nogil:
for i in range(self._nitems):
if self._data[i] > m:
m = self._data[i]
return m
cpdef ScoringMatrix shuffle(self, str alphabet):
"""Shuffle the matrix using the new given alphabet.
The matrix name is retained only when the provided ``alphabet`` is a
permutation of the current alphabet, e.g. there is no loss of data.
Arguments:
alphabet (`str`): The new alphabet to use for the columns. It
must be a subset of ``self.alphabet``.
Raises:
`KeyError`: When some required alphabet letters are missing from
the source matrix alphabet.
Example:
>>> m1 = ScoringMatrix.from_name("BLOSUM62")
>>> m1[1, 1]
5.0
>>> m1['R', 'R']
5.0
>>> m2 = m1.shuffle("ABCDEFGHIKLMNPQRSTVWXYZ*")
>>> m2[1, 1]
4.0
>>> m2['R', 'R']
5.0
"""
cdef size_t i
cdef list indices = []
cdef list matrix = []
for x in alphabet:
try:
indices.append(self.alphabet.index(x))
except ValueError:
raise KeyError(f"new alphabet contains unknown letter: {x!r}") from None
for letter in alphabet:
row = self[<str> letter]
matrix.append([row[j] for j in indices])
name = self.name if len(alphabet) == len(self.alphabet) else None
return type(self)(matrix, alphabet=alphabet, name=name)

Sorry, the diff of this file is too big to display

cdef extern from "matrices.h" nogil:
const char* _NAMES[]
const char* _ALPHABETS[]
const size_t _SIZES[]
const float* _MATRICES[]
# noqa: D104
from . import (
test_doctest,
test_scoring_matrix,
)
def load_tests(loader, suite, pattern):
test_doctest.load_tests(loader, suite, pattern)
suite.addTests(loader.loadTestsFromModule(test_scoring_matrix))
return suite
# coding: utf-8
"""Test doctest contained tests in every file of the module.
"""
import configparser
import doctest
import importlib
import os
import pkgutil
import re
import shutil
import sys
import types
import warnings
from unittest import mock
import scoring_matrices
def _load_tests_from_module(tests, module, globs, setUp=None, tearDown=None):
"""Load tests from module, iterating through submodules."""
for attr in (getattr(module, x) for x in dir(module) if not x.startswith("_")):
if isinstance(attr, types.ModuleType):
suite = doctest.DocTestSuite(
attr,
globs,
setUp=setUp,
tearDown=tearDown,
optionflags=+doctest.ELLIPSIS,
)
tests.addTests(suite)
return tests
def load_tests(loader, tests, ignore):
"""`load_test` function used by unittest to find the doctests."""
_current_cwd = os.getcwd()
# demonstrate how to use Biopython substitution matrices without
# actually requiring Biopython
Bio = mock.Mock()
Bio.Align = mock.Mock()
Bio.Align.substitution_matrices = mock.Mock()
Bio.Align.substitution_matrices.load = mock.Mock()
Bio.Align.substitution_matrices.load.return_value = feng = mock.Mock()
data = [ [-1 for _ in range(20)] for _ in range(20) ]
for i in range(20):
data[i][i] = 1
feng.alphabet = "ARNDCQEGHILKMFPSTWYV"
feng.__len__ = mock.Mock(return_value=20)
feng.__iter__ = mock.Mock(wraps=data.__iter__)
def setUp(self):
warnings.simplefilter("ignore")
# os.chdir(os.path.realpath(os.path.join(__file__, os.path.pardir, "data")))
def tearDown(self):
# os.chdir(_current_cwd)
warnings.simplefilter(warnings.defaultaction)
# doctests are not compatible with `green`, so we may want to bail out
# early if `green` is running the tests
if sys.argv[0].endswith("green"):
return tests
# recursively traverse all library submodules and load tests from them
packages = [None, scoring_matrices.lib]
for pkg in iter(packages.pop, None):
globs = dict(scoring_matrices=scoring_matrices, Bio=Bio, **pkg.__dict__)
tests.addTests(
doctest.DocTestSuite(
pkg,
globs=globs,
setUp=setUp,
tearDown=tearDown,
optionflags=+doctest.ELLIPSIS,
)
)
return tests
import pickle
import unittest
import sys
from scoring_matrices import ScoringMatrix
class TestScoringMatrix(unittest.TestCase):
def test_from_name_blosum50(self):
matrix = ScoringMatrix.from_name("BLOSUM50")
diagonal = [ matrix[i, i] for i in range(len(matrix)) ]
self.assertEqual(len(diagonal), 24)
self.assertEqual(diagonal, [5, 7, 7, 8, 13, 7, 6, 8, 10, 5, 5, 6, 7, 8, 10, 5, 5, 15, 8, 5, 5, 5 ,-1 ,1])
def test_from_name_blosum62(self):
matrix = ScoringMatrix.from_name("BLOSUM62")
diagonal = [ matrix[i, i] for i in range(len(matrix)) ]
self.assertEqual(len(diagonal), 24)
self.assertEqual(diagonal, [4, 5, 6, 6, 9, 5, 5, 6, 8, 4, 4, 5, 5, 6, 7, 4, 5, 11, 7, 4, 4, 4, -1, 1])
def test_from_name_invalid_name(self):
with self.assertRaises(ValueError):
aa = ScoringMatrix.from_name("nonsensical")
def test_from_str(self):
m1 = ScoringMatrix.from_str(
"""
A T G C
A 5 -4 -4 -4
T -4 5 -4 -4
G -4 -4 5 -4
C -4 -4 -4 5
""".strip()
)
self.assertEqual(m1.alphabet, "ATGC")
self.assertEqual(m1['T', 'A'], -4.0)
self.assertEqual(m1['A', 'A'], 5.0)
m2 = ScoringMatrix.from_str(
"""
A T G C
5 -4 -4 -4
-4 5 -4 -4
-4 -4 5 -4
-4 -4 -4 5
""".strip()
)
self.assertEqual(m2.alphabet, "ATGC")
self.assertEqual(m2['T', 'A'], -4.0)
self.assertEqual(m2['A', 'A'], 5.0)
def test_from_diagonal(self):
m = ScoringMatrix.from_diagonal([1, 2, 3, 4], 0.0, alphabet="ATGC")
self.assertEqual(m[0], [1.0, 0.0, 0.0, 0.0])
self.assertEqual(m[1], [0.0, 2.0, 0.0, 0.0])
self.assertEqual(m[2], [0.0, 0.0, 3.0, 0.0])
self.assertEqual(m[3], [0.0, 0.0, 0.0, 4.0])
m = ScoringMatrix.from_diagonal([1, 2, 3, 4], -1.0, alphabet="ATGC")
self.assertEqual(m[0], [ 1.0, -1.0, -1.0, -1.0])
self.assertEqual(m[1], [-1.0, 2.0, -1.0, -1.0])
self.assertEqual(m[2], [-1.0, -1.0, 3.0, -1.0])
self.assertEqual(m[3], [-1.0, -1.0, -1.0, 4.0])
def test_from_diagonal_invalid_length(self):
self.assertRaises(
ValueError,
ScoringMatrix.from_diagonal,
[ 3, 3, 3, 3, 3, 3 ],
alphabet="ATGC"
)
self.assertRaises(
ValueError,
ScoringMatrix.from_diagonal,
[ 3, 3, 3 ],
alphabet="ATGC"
)
def test_list(self):
aa = ScoringMatrix.from_name("BLOSUM50")
matrix = list(aa)
columns = aa.alphabet
self.assertEqual(len(columns), 24)
self.assertEqual(len(matrix), 24)
for row in matrix:
self.assertEqual(len(row), 24)
@unittest.skipUnless(sys.implementation.name == "cpython", "memoryview not supported")
@unittest.skipUnless(sys.version_info >= (3, 9), "memoryview not supported")
def test_memoryview(self):
aa = ScoringMatrix.from_name("BLOSUM50")
mem = memoryview(aa)
self.assertEqual(mem.shape, (24, 24))
self.assertEqual(mem[0, 0], 5.0) # A <-> A
self.assertEqual(mem[6, 6], 6.0) # E <-> E
def test_init_empty(self):
m = ScoringMatrix([], alphabet="")
self.assertEqual(len(m), 0)
self.assertFalse(bool(m))
def test_init_invalid_length(self):
with self.assertRaises(ValueError):
m = ScoringMatrix(
[
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
],
alphabet="ATGC",
)
with self.assertRaises(ValueError):
m = ScoringMatrix(
[
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0],
],
alphabet="ATGC",
)
def test_eq(self):
sm1 = ScoringMatrix.from_name("BLOSUM50")
sm2 = ScoringMatrix.from_name("BLOSUM50")
sm3 = ScoringMatrix.from_name("BLOSUM62")
self.assertEqual(sm1, sm1)
self.assertEqual(sm1, sm2)
self.assertNotEqual(sm1, sm3)
self.assertNotEqual(sm1, 12)
def test_pickle(self):
sm1 = ScoringMatrix.from_name("BLOSUM62")
sm2 = pickle.loads(pickle.dumps(sm1))
self.assertEqual(sm1.alphabet, sm2.alphabet)
self.assertEqual(list(sm1), list(sm2))
def test_shuffle_invalid_alphabet(self):
matrix = ScoringMatrix.from_name("BLOSUM62")
self.assertRaises(KeyError, matrix.shuffle, "ARNJOU")
def test_shuffle_empty(self):
matrix = ScoringMatrix.from_name("BLOSUM62")
empty = matrix.shuffle("")
self.assertEqual(len(empty), 0)
self.assertFalse(bool(empty))
def test_is_symmetric(self):
matrix = ScoringMatrix.from_name("PAM250")
self.assertTrue(matrix.is_symmetric())
matrix = ScoringMatrix([[1, 2, 3], [4, 5, 6], [7, 8, 9]], "ABC")
self.assertFalse(matrix.is_symmetric())
[metadata]
name = scoring-matrices
version = attr: scoring_matrices.__version__
author = Martin Larralde
author_email = martin.larralde@embl.de
url = https://github.com/althonos/score-matrices
description = Dependency free, Cython-compatible scoring matrices to use with biological sequences.
long_description = file: README.md
long_description_content_type = text/markdown
license = MIT
platform = any
keywords = bioinformatics, sequence, substitution, matrix, score
classifier =
Development Status :: 3 - Alpha
Intended Audience :: Developers
Intended Audience :: Science/Research
License :: OSI Approved :: MIT License
Operating System :: OS Independent
Programming Language :: C
Programming Language :: Cython
Programming Language :: Python :: 3.6
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
Programming Language :: Python :: 3.12
Programming Language :: Python :: Implementation :: CPython
Programming Language :: Python :: Implementation :: PyPy
Topic :: Scientific/Engineering :: Bio-Informatics
Typing :: Typed
project_urls =
Bug Tracker = https://github.com/althonos/scoring-matrices/issues
Changelog = https://github.com/althonos/scoring-matrices/blob/main/CHANGELOG.md
Coverage = https://codecov.io/gh/althonos/scoring-matrices/
Builds = https://github.com/althonos/scoring-matrices/actions
[options]
zip_safe = false
packages = scoring_matrices, scoring_matrices.tests
include_package_data = true
python_requires = >=3.5
setup_requires =
setuptools >=46.4
cython ~=3.0
[options.package_data]
scoring_matrices = py.typed, *.pyi, *.pxd, *.h
[coverage:run]
plugins = Cython.Coverage
include = scoring_matrices/*.py, scoring_matrices/lib.pyx
[coverage:report]
include = scoring_matrices/*
omit = scoring_matrices/tests/*
show_missing = true
exclude_lines =
pragma: no cover
if typing.TYPE_CHECKING:
@abc.abstractmethod
@abc.abstractproperty
raise NotImplementedError
return NotImplemented
raise MemoryError()
[pydocstyle]
inherit = false
ignore = D102,D105,D200,D203,D213,D406,D407
match-dir = (?!tests)(?!docs)[^\.].*
match = (?!test)(?!setup)[^\._].*\.py
[mypy]
disallow_any_decorated = true
disallow_any_generics = true
disallow_any_unimported = false
disallow_subclassing_any = false
disallow_untyped_calls = true
disallow_untyped_defs = true
ignore_missing_imports = true
warn_unused_ignores = true
warn_return_any = true
exclude = scoring_matrices/tests/*
[mypy-scoring_matrices.tests]
disallow_untyped_defs = false
[isort]
known_first_party = scoring_matrices
known_standard_library = cpython, cython, _unicode
known_third_party = libc, libcpp
line_length = 88
profile = black
skip_gitignore = true
[egg_info]
tag_build =
tag_date = 0
import configparser
import functools
import ftplib
import glob
import itertools
import io
import json
import multiprocessing.pool
import os
import platform
import re
import setuptools
import setuptools.extension
import subprocess
import string
import sys
import sysconfig
from distutils.command.clean import clean as _clean
from distutils.errors import CompileError
from setuptools.command.build_ext import build_ext as _build_ext
from setuptools.command.sdist import sdist as _sdist
from setuptools.extension import Extension
try:
from Cython.Build import cythonize
except ImportError as err:
cythonize = err
# --- Utils ------------------------------------------------------------------
def _eprint(*args, **kwargs):
print(*args, **kwargs, file=sys.stderr)
def _split_multiline(value):
value = value.strip()
sep = max('\n,;', key=value.count)
return list(filter(None, map(lambda x: x.strip(), value.split(sep))))
# --- Commands ------------------------------------------------------------------
class sdist(_sdist):
"""A `sdist` that generates a `pyproject.toml` on the fly."""
def run(self):
# generate score matrices
if not self.distribution.have_run.get("build_matrices", False):
_build_cmd = self.get_finalized_command("build_matrices")
_build_cmd.force = self.force
_build_cmd.run()
# build `pyproject.toml` from `setup.cfg`
c = configparser.ConfigParser()
c.add_section("build-system")
c.set("build-system", "requires", str(self.distribution.setup_requires))
c.set("build-system", "build-backend", '"setuptools.build_meta"')
with open("pyproject.toml", "w") as pyproject:
c.write(pyproject)
# run the rest of the packaging
_sdist.run(self)
class build_matrices(setuptools.Command):
user_options = [
('force', 'f', 'force generation of files'),
('output=', 'o', 'output file to write'),
('matrices=', 'm', 'matrices to generate'),
]
def initialize_options(self) -> None:
self.force = False
self.output = None
self.matrices = None
def finalize_options(self) -> None:
self.folder = "data"
self.output = os.path.join("scoring_matrices", "matrices.h")
if self.matrices is not None:
self.matrices = _split_multiline(self.matrices)
else:
self.matrices = [
os.path.splitext(os.path.basename(mat))[0]
for mat in glob.glob(os.path.join(self.folder, "*.mat"))
]
def run(self):
matrix_files = [ os.path.join(self.folder, f"{matrix}.mat") for matrix in self.matrices ]
self.make_file(matrix_files, self.output, self._generate_matrices, (matrix_files, self.output))
def _parse_matrix_file(self, matrix_file):
with open(matrix_file) as f:
lines = filter(
lambda line: line and not line.startswith("#"),
map(str.strip, f),
)
letters = ''.join(next(lines).split())
matrix = [
list(map(float, line.strip().split()[1:]))
for line in map(str.strip, lines)
if line
]
return letters, matrix
def _generate_matrices(self, matrix_files, output_file):
matrices = {}
for matrix_file in matrix_files:
matrix_name = os.path.splitext(os.path.basename(matrix_file))[0].upper()
matrices[matrix_name] = self._parse_matrix_file(matrix_file)
with open(output_file, "w") as dst:
dst.write("#include <stddef.h>\n")
names = sorted(matrices.keys())
ids = [ name.replace(".", "_") for name in names ]
dst.write(f"const char* _NAMES[{len(names) + 1}] = {{")
for name in names:
dst.write(f'"{name}", ')
dst.write("NULL };\n")
dst.write(f"const char* _ALPHABETS[{len(names) + 1}] = {{")
for name in names:
alphabet, _ = matrices[name]
dst.write(f'"{alphabet}", ')
dst.write("NULL };\n")
dst.write(f"const size_t _SIZES[{len(names) + 1}] = {{")
for name in names:
alphabet, _ = matrices[name]
dst.write(f'{len(alphabet)}, ')
dst.write("-1 };\n")
for i, (name, id_) in enumerate(zip(names, ids)):
alphabet, matrix = matrices[name]
nitems = len(matrix) * len(matrix)
dst.write(f"float _MATRIX_{id_}[{nitems}] = {{")
for i, item in enumerate(itertools.chain.from_iterable(matrix)):
if i != 0:
dst.write(", ")
dst.write(f"{item!r}F")
dst.write("};\n")
dst.write(f"const float* _MATRICES[{len(names) + 1}] = {{")
for id_ in ids:
dst.write(f'_MATRIX_{id_}, ')
dst.write("NULL };\n")
class build_ext(_build_ext):
"""A `build_ext` that adds various SIMD flags and defines."""
# --- Compatibility with `setuptools.Command`
def initialize_options(self):
_build_ext.initialize_options(self)
def finalize_options(self):
_build_ext.finalize_options(self)
# check platform
if self.plat_name is None:
self.plat_name = sysconfig.get_platform()
# --- Autotools-like helpers ---
def _check_getid(self):
_eprint('checking whether `PyInterpreterState_GetID` is available')
base = "have_getid"
testfile = os.path.join(self.build_temp, "{}.c".format(base))
objects = []
self.mkpath(self.build_temp)
with open(testfile, "w") as f:
f.write("""
#include <stdint.h>
#include <stdlib.h>
#include <Python.h>
int main(int argc, char *argv[]) {{
PyInterpreterState_GetID(NULL);
return 0;
}}
""")
if self.compiler.compiler_type == "msvc":
flags = ["/WX"]
else:
flags = ["-Werror=implicit-function-declaration"]
try:
self.mkpath(self.build_temp)
objects = self.compiler.compile([testfile], extra_postargs=flags)
except CompileError:
_eprint("no")
return False
else:
_eprint("yes")
return True
finally:
os.remove(testfile)
for obj in filter(os.path.isfile, objects):
os.remove(obj)
# --- Build code ---
def build_extension(self, ext):
# show the compiler being used
_eprint("building", ext.name, "with", self.compiler.compiler_type, "compiler for platform", self.plat_name)
# add debug symbols if we are building in debug mode
if self.debug:
if self.compiler.compiler_type in {"unix", "cygwin", "mingw32"}:
ext.extra_compile_args.append("-g")
elif self.compiler.compiler_type == "msvc":
ext.extra_compile_args.append("/Z7")
if sys.implementation.name == "cpython":
ext.define_macros.append(("CYTHON_TRACE_NOGIL", 1))
else:
ext.define_macros.append(("CYTHON_WITHOUT_ASSERTIONS", 1))
# add Windows flags
if self.compiler.compiler_type == "msvc":
ext.define_macros.append(("WIN32", 1))
# build the rest of the extension as normal
ext._needs_stub = False
# compile extension in its own folder: since we need to compile
# `tantan.cc` several times with different flags, we cannot use the
# default build folder, otherwise the built object would be cached
# and prevent recompilation
_build_temp = self.build_temp
self.build_temp = os.path.join(_build_temp, ext.name)
_build_ext.build_extension(self, ext)
self.build_temp = _build_temp
def build_extensions(self):
# check `cythonize` is available
if isinstance(cythonize, ImportError):
raise RuntimeError(
"Cython is required to run `build_ext` command"
) from cythonize
# generate score matrices
if not self.distribution.have_run.get("build_matrices", False):
_build_cmd = self.get_finalized_command("build_matrices")
_build_cmd.force = self.force
_build_cmd.run()
# use debug directives with Cython if building in debug mode
cython_args = {
"include_path": ["include"],
"compiler_directives": {
"cdivision": True,
"nonecheck": False,
},
"compile_time_env": {
"SYS_IMPLEMENTATION_NAME": sys.implementation.name,
"SYS_VERSION_INFO_MAJOR": sys.version_info.major,
"SYS_VERSION_INFO_MINOR": sys.version_info.minor,
"SYS_VERSION_INFO_MICRO": sys.version_info.micro,
"DEFAULT_BUFFER_SIZE": io.DEFAULT_BUFFER_SIZE,
},
}
if self.force:
cython_args["force"] = True
if self.debug:
cython_args["annotate"] = True
cython_args["compiler_directives"]["cdivision_warnings"] = True
cython_args["compiler_directives"]["warn.undeclared"] = True
cython_args["compiler_directives"]["warn.unreachable"] = True
cython_args["compiler_directives"]["warn.maybe_uninitialized"] = True
cython_args["compiler_directives"]["warn.unused"] = True
cython_args["compiler_directives"]["warn.unused_arg"] = True
cython_args["compiler_directives"]["warn.unused_result"] = True
cython_args["compiler_directives"]["warn.multiple_declarators"] = True
else:
cython_args["compiler_directives"]["boundscheck"] = False
cython_args["compiler_directives"]["wraparound"] = False
# check if `PyInterpreterState_GetID` is defined
if self._check_getid():
self.compiler.define_macro("HAS_PYINTERPRETERSTATE_GETID", 1)
# cythonize the extensions
self.extensions = cythonize(self.extensions, **cython_args)
# build the extensions as normal
_build_ext.build_extensions(self)
class clean(_clean):
"""A `clean` that removes intermediate files created by Cython."""
def run(self):
source_dir = os.path.join(os.path.dirname(__file__), "pytantan")
patterns = ["*.html"]
if self.all:
patterns.extend(["*.so", "*.c", "*.cpp"])
for pattern in patterns:
for file in glob.glob(os.path.join(source_dir, pattern)):
_eprint("removing {!r}".format(file))
os.remove(file)
for ext in self.distribution.ext_modules:
for source_file in ext.sources:
if source_file.endswith(".pyx"):
ext = ".cpp" if ext.language == "c++" else ".c"
c_file = source_file.replace(".pyx", ext)
if os.path.exists(c_file):
_eprint("removing {!r}".format(c_file))
os.remove(c_file)
_clean.run(self)
# --- Setup ---------------------------------------------------------------------
setuptools.setup(
ext_modules=[
Extension(
"scoring_matrices.lib",
language="c",
include_dirs=["scoring_matrices"],
sources=[os.path.join("scoring_matrices", "lib.pyx")],
),
],
cmdclass={
"sdist": sdist,
"build_ext": build_ext,
"build_matrices": build_matrices,
"clean": clean,
},
)