abutils
Advanced tools
+205
| #!/usr/bin/env python | ||
| # filename: bin.py | ||
| # | ||
| # | ||
| # Copyright (c) 2023 Bryan Briney | ||
| # License: The MIT license (http://opensource.org/licenses/MIT) | ||
| # | ||
| # Permission is hereby granted, free of charge, to any person obtaining a copy of this software | ||
| # and associated documentation files (the "Software"), to deal in the Software without restriction, | ||
| # including without limitation the rights to use, copy, modify, merge, publish, distribute, | ||
| # sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is | ||
| # furnished to do so, subject to the following conditions: | ||
| # | ||
| # The above copyright notice and this permission notice shall be included in all copies or | ||
| # substantial portions of the Software. | ||
| # | ||
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING | ||
| # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
| # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, | ||
| # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
| # | ||
| import os | ||
| import platform | ||
| from typing import Optional | ||
| from urllib.request import urlretrieve | ||
| __all__ = ["get_path", "get_binary_directory"] | ||
| # __all__ = [ | ||
| # "cdhit", | ||
| # "fastp", | ||
| # "fasttree", | ||
| # "mmseqs", | ||
| # "mafft", | ||
| # "muscle", | ||
| # "muscle_v3", | ||
| # "vsearch", | ||
| # ] | ||
| BIN_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "bin") | ||
| SYSTEM = platform.system().lower() | ||
| MACHINE = platform.machine().lower().replace("x86_64", "amd64") | ||
| # cdhit = os.path.join(BIN_DIR, f"cd-hit_{SYSTEM}_{MACHINE}") | ||
| # fastp = os.path.join(BIN_DIR, f"fastp_{SYSTEM}") | ||
| # fasttree = os.path.join(BIN_DIR, f"fasttree_{SYSTEM}_{MACHINE}") | ||
| # mafft = os.path.join(BIN_DIR, f"mafft_{SYSTEM}_{MACHINE}/mafft.bat") | ||
| # mmseqs = os.path.join(BIN_DIR, f"mmseqs_{SYSTEM}_{MACHINE}") | ||
| # muscle = os.path.join(BIN_DIR, f"muscle_{SYSTEM}_{MACHINE}") | ||
| # muscle_v3 = os.path.join(BIN_DIR, f"muscle3_{SYSTEM}_{MACHINE}") | ||
| # vsearch = os.path.join(BIN_DIR, f"vsearch_{SYSTEM}_{MACHINE}") | ||
| def get_binary_directory() -> str: | ||
| """ | ||
| Gets the abutils binary directory. | ||
| Returns | ||
| ------- | ||
| str | ||
| Path to the abutils binary directory. | ||
| """ | ||
| return BIN_DIR | ||
| def copy_to_binary_directory(source: str, name: Optional[str] = None) -> None: | ||
| """ | ||
| Copies a binary to the abutils binary directory. | ||
| Parameters | ||
| ---------- | ||
| source : str | ||
| Path to the binary to be copied. | ||
| name : str, optional | ||
| Name of the binary in the binary directory. If not provided, the basename of the | ||
| source binary will be used. | ||
| """ | ||
| binary_dir = get_binary_directory() | ||
| name = name if name is not None else os.path.basename(source) | ||
| target = os.path.join(binary_dir, source) | ||
| if not os.path.exists(target): | ||
| os.system(f'cp "{source}" "{target}"') | ||
| def get_path(binary: str) -> str: | ||
| """ | ||
| Gets the path to a pre-packaged binary. | ||
| .. note:: | ||
| If the binary is not found in the binary directory, it will be downloaded from | ||
| the brineylab S3 bucket. This is because some binaries are too large | ||
| to be included in the ``abutils`` PyPI package. | ||
| Parameters | ||
| ---------- | ||
| binary : str | ||
| Name of the binary. Available binaries are: | ||
| - blastn | ||
| - cd-hit | ||
| - fastp | ||
| - fasttree | ||
| - makeblastdb | ||
| - mafft | ||
| - mmseqs | ||
| - muscle | ||
| - muscle_v3 | ||
| - vsearch | ||
| Returns | ||
| ------- | ||
| str | ||
| Path to the binary. | ||
| """ | ||
| # format (and maybe fix) binary name | ||
| binary = binary.lower() | ||
| if binary == "mmseqs2": | ||
| binary = "mmseqs" | ||
| if binary == "muscle_v3": | ||
| binary = "muscle3" | ||
| if binary == "cd-hit": | ||
| binary = "cdhit" | ||
| available = [ | ||
| # blast | ||
| "blastn", | ||
| "makeblastdb", | ||
| # clustering/alignment | ||
| "cdhit", | ||
| "mafft", | ||
| "mmseqs", | ||
| "muscle", | ||
| "muscle3", | ||
| "vsearch", | ||
| # qc | ||
| "fastp", | ||
| # phylogeny | ||
| "fasttree", | ||
| ] | ||
| if binary not in available: | ||
| raise ValueError( | ||
| f"Binary '{binary}' not available. Available binaries: \n" | ||
| + "\n".join(available) | ||
| ) | ||
| # get binary path | ||
| if binary in ["fastp", "muscle3", "blastn", "makeblastdb"]: | ||
| bin_name = f"{binary}_{SYSTEM}" | ||
| bin_path = os.path.join(BIN_DIR, bin_name) | ||
| elif binary == "mafft": | ||
| bin_name = f"{binary}_{SYSTEM}_{MACHINE}/mafft.bat" | ||
| bin_path = os.path.join(BIN_DIR, bin_name) | ||
| else: | ||
| bin_name = f"{binary}_{SYSTEM}_{MACHINE}" | ||
| bin_path = os.path.join(BIN_DIR, bin_name) | ||
| # download binary if missing | ||
| if not os.path.exists(bin_path): | ||
| print( | ||
| f"Downloading missing binary: {binary}. This will only occur once on intial use of the binary." | ||
| ) | ||
| download_missing_binary(bin_name, bin_path) | ||
| return bin_path | ||
| # download missing binary (some are too large to included in the PyPI package) | ||
| def download_missing_binary(bin_name: str, bin_path: str) -> None: | ||
| url = f"https://brineylab.s3.amazonaws.com/tools/abutils_binaries/{bin_name}" | ||
| urlretrieve(url, bin_path) | ||
| # # download any missing binaries (some are too large to included in the PyPI package) | ||
| # def download_missing_binaries(): | ||
| # to_download = [] | ||
| # for b in [ | ||
| # "cd-hit", | ||
| # "fastp", | ||
| # "fasttree", | ||
| # "mafft", | ||
| # "mmseqs", | ||
| # "muscle", | ||
| # "muscle3", | ||
| # "vsearch", | ||
| # ]: | ||
| # if b in ["fastp", "muscle3"]: | ||
| # bin_name = f"{b}_{SYSTEM}" | ||
| # bin_path = os.path.join(BIN_DIR, bin_name) | ||
| # else: | ||
| # bin_name = f"{b}_{SYSTEM}_{MACHINE}" | ||
| # bin_path = os.path.join(BIN_DIR, bin_name) | ||
| # if not os.path.exists(bin_path): | ||
| # to_download.append((bin_name, bin_path)) | ||
| # if to_download: | ||
| # print( | ||
| # "Downloading missing binaries. This will only occur once on intial use of abutils." | ||
| # ) | ||
| # url = f"https://brineylab.s3.amazonaws.com/tools/abutils_binaries/{bin_name}" | ||
| # urlretrieve(url, bin_path) | ||
| # download_missing_binaries() |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
| import pytest | ||
| from abutils import Sequence | ||
| from abutils.tools.cloning import build_synthesis_constructs | ||
| # ------------------------------------ | ||
| # build_synthesis_constructs | ||
| # ------------------------------------ | ||
| def test_build_synthesis_constructs_single_sequence(): | ||
| seq = Sequence("RINEYLA") | ||
| seq["locus"] = "IGH" | ||
| constructs = build_synthesis_constructs(seq) | ||
| assert len(constructs) == 1 | ||
| opt_seq = str(constructs[0].sequence) | ||
| assert opt_seq.startswith("catcctttttctagtagcaactgcaaccggtgtacac".upper()) | ||
| assert opt_seq.endswith("gcgtcgaccaagggcccatcggtcttcc".upper()) | ||
| def test_build_synthesis_constructs_multiple_sequences(): | ||
| seqs = [Sequence("RINEYLA"), Sequence("ALYENIR")] | ||
| for seq in seqs: | ||
| seq["locus"] = "IGH" | ||
| constructs = build_synthesis_constructs(seqs) | ||
| assert len(constructs) == 2 | ||
| def test_build_synthesis_constructs_with_overhangs(): | ||
| seq = Sequence("RINEYLA") | ||
| seq["locus"] = "IGH" | ||
| overhang_5 = {"IGH": "gggg"} | ||
| overhang_3 = {"IGH": "cccc"} | ||
| constructs = build_synthesis_constructs( | ||
| seq, overhang_5=overhang_5, overhang_3=overhang_3 | ||
| ) | ||
| opt_seq = str(constructs[0].sequence) | ||
| assert opt_seq.startswith("gggg".upper()) | ||
| assert opt_seq.endswith("cccc".upper()) | ||
| def test_build_synthesis_constructs_no_overhangs(): | ||
| seq = Sequence("ATGCATGCTTATGAG") | ||
| constructs = build_synthesis_constructs(seq, overhang_5={}, overhang_3={}) | ||
| assert len(constructs[0].sequence) == len("ATGCATGCTTATGAG") | ||
| def test_build_synthesis_constructs_group_by_chain(): | ||
| seqs = [Sequence("RINEYLA"), Sequence("ALYENIR")] | ||
| for seq, locus in zip(seqs, ["IGH", "IGK"]): | ||
| seq["locus"] = locus | ||
| constructs = build_synthesis_constructs(seqs, group_by_chain=True) | ||
| assert constructs[0].id[-3:] == "IGH" | ||
| assert constructs[1].id[-3:] == "IGK" | ||
| def test_build_synthesis_constructs_sort_func(): | ||
| seqs = [Sequence("RINEYLA", id="B"), Sequence("ALYENIR", id="A")] | ||
| for s in seqs: | ||
| s["locus"] = "IGH" | ||
| constructs = build_synthesis_constructs( | ||
| seqs, sort_func=lambda s: s.id, add_locus_to_name=False | ||
| ) | ||
| assert constructs[0].id == "A" | ||
| assert constructs[1].id == "B" |
| #!/usr/bin/env python | ||
| # filename: cloning.py | ||
| # | ||
| # | ||
| # Copyright (c) 2023 Bryan Briney | ||
| # License: The MIT license (http://opensource.org/licenses/MIT) | ||
| # | ||
| # Permission is hereby granted, free of charge, to any person obtaining a copy of this software | ||
| # and associated documentation files (the "Software"), to deal in the Software without restriction, | ||
| # including without limitation the rights to use, copy, modify, merge, publish, distribute, | ||
| # sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is | ||
| # furnished to do so, subject to the following conditions: | ||
| # | ||
| # The above copyright notice and this permission notice shall be included in all copies or | ||
| # substantial portions of the Software. | ||
| # | ||
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING | ||
| # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
| # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, | ||
| # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
| # | ||
| from typing import Callable, Iterable, Optional, Union | ||
| from tqdm.auto import tqdm | ||
| from ..core.sequence import Sequence, codon_optimize | ||
| __all__ = ["build_synthesis_constructs"] | ||
| def build_synthesis_constructs( | ||
| sequences: Union[Sequence, Iterable[Sequence], str], | ||
| overhang_5: Optional[dict] = None, | ||
| overhang_3: Optional[dict] = None, | ||
| sequence_key: Optional[str] = None, | ||
| id_key: Optional[str] = None, | ||
| locus_key: Optional[str] = None, | ||
| frame: Optional[int] = None, | ||
| add_locus_to_name: bool = True, | ||
| group_by_chain: bool = False, | ||
| show_progress: bool = True, | ||
| sort_func: Optional[Callable] = None, | ||
| ): | ||
| """ | ||
| Builds codon-optimized synthesis constructs, including Gibson overhangs suitable | ||
| for cloning IGH, IGK and IGL variable region constructs into antibody expression | ||
| vectors. | ||
| .. seealso:: | ||
| | Thomas Tiller, Eric Meffre, Sergey Yurasov, Makoto Tsuiji, Michel C Nussenzweig, Hedda Wardemann | ||
| | Efficient generation of monoclonal antibodies from single human B cells by single cell RT-PCR and expression vector cloning | ||
| | *Journal of Immunological Methods* 2008, doi: 10.1016/j.jim.2007.09.017 | ||
| Parameters | ||
| ---------- | ||
| sequences : ``Sequence``, str, or ``Iterable[Sequence]`` | ||
| A ``Sequence`` object, a sequence as a ``str``, or an ``Iterable`` of ``Sequence`` objects. | ||
| .. note:: | ||
| If the provided sequences are nucleotide sequences, ``frame`` must also be provided | ||
| so that codon optimization can be performed on the correct reading frame. | ||
| overhang_5 : dict, optional | ||
| A ``dict`` mapping the locus name to 5' Gibson overhangs. By default, Gibson | ||
| overhangs corresponding to the expression vectors in Tiller et al, 2008: | ||
| | **IGH:** ``catcctttttctagtagcaactgcaaccggtgtacac`` | ||
| | **IGK:** ``atcctttttctagtagcaactgcaaccggtgtacac`` | ||
| | **IGL:** ``atcctttttctagtagcaactgcaaccggtgtacac`` | ||
| To produce constructs without 5' Gibson overhangs, provide an empty dictionary. | ||
| overhang_3 : dict, optional | ||
| A ``dict`` mapping the locus name to 3' Gibson overhangs. By default, Gibson | ||
| overhangs corresponding to the expression vectors in Tiller et al, 2008: | ||
| | **IGH:** ``gcgtcgaccaagggcccatcggtcttcc`` | ||
| | **IGK:** ``cgtacggtggctgcaccatctgtcttcatc`` | ||
| | **IGL:** ``ggtcagcccaaggctgccccctcggtcactctgttcccgccctcgagtgaggagcttcaagccaacaaggcc`` | ||
| To produce constructs without 3' Gibson overhangs, provide an empty dictionary. | ||
| sequence_key : str, default='sequence_aa' | ||
| Field containing the sequence to be codon optimized. Default is ``'sequence_aa'`` if | ||
| ``annotation_format == 'airr'`` or ``'vdj_aa'`` if ``annotation_format == 'json'``. | ||
| Either nucleotide or amino acid sequences are acceptable. | ||
| id_key : str, optional | ||
| Field containing the name of the sequence. If not provided, ``sequence.id`` is used. | ||
| locus_key : str, optional | ||
| Field containing the name of the locus. If not provided, ``sequence["locus"]`` is used, which | ||
| conforms with AIRR naming conventions. | ||
| .. note:: | ||
| The data in ``locus_key`` should return one of the following: | ||
| * IGH, IGK, or IGL (which are the names of the loci in AIRR naming conventions) | ||
| * heavy, kappa, or lambda | ||
| frame : int, default=1 | ||
| Reading frame to translate. Default is ``1``. | ||
| .. note:: | ||
| This parameter is ignored if the input sequences are amino acid sequences. | ||
| add_locus_to_name : bool, default=True | ||
| If ``True``, the name of the sequence will be appended with the name of the locus, separated by an underscore. | ||
| If ``False``, the name of the sequence will not be modified. | ||
| group_by_chain : bool, default=False | ||
| If ``True``, the output ``list`` of ``Sequence`` objects will be separated by chain (heavy first, light second). | ||
| Within each chain group, sequence ordering will retained (either input order, or sorted by ``sort_func``). | ||
| If ``False``, the output ``list`` of ``Sequence`` objects will not be grouped. | ||
| in_place : bool, default=False | ||
| If ``True``, the input ``Sequence`` objects will be returned with the optimized sequence | ||
| populating the ``sequence.codon_optimized`` property. If ``False``, the optimized sequences | ||
| will be returned as new ``Sequence`` objects. | ||
| sort_func : Callable, optional | ||
| A function to sort the output ``list`` of ``Sequence`` objects. By default, | ||
| the output ``list`` is is in the same order as the input. | ||
| Returns | ||
| ------- | ||
| sequences : ``list`` of ``Sequence`` objects | ||
| A ``list`` of ``Sequence`` objects. Each ``Sequence`` object has the following properties: | ||
| | *id*: The sequence ID (either ``sequence.id`` or ``id_key``), to which the locus (``locus_key``) is appended if ``add_locus_to_name`` is ``True``. | ||
| | *sequence*: The codon-optimized sequence, including Gibson overhangs. | ||
| If ``sort_func`` is provided, the output ``list`` will be sorted by ``sort_func``, otherwise the output is in the same order as the input. | ||
| """ | ||
| # get overhangs | ||
| overhang_3 = overhang_3 if overhang_3 is not None else GIBSON3 | ||
| overhang_5 = overhang_5 if overhang_5 is not None else GIBSON5 | ||
| # process input | ||
| if isinstance(sequences, (Sequence, str)): | ||
| sequences = [sequences] | ||
| sequences = [Sequence(s) for s in sequences] | ||
| # sort | ||
| if sort_func is not None: | ||
| sequences = sorted(sequences, key=sort_func) | ||
| if group_by_chain: | ||
| locus_key = locus_key if locus_key is not None else "locus" | ||
| heavies = [s for s in sequences if s[locus_key] in ["heavy", "IGH"]] | ||
| lights = [s for s in sequences if s[locus_key] not in ["heavy", "IGH"]] | ||
| sequences = heavies + lights | ||
| # make codon-optimized synthesis constructs | ||
| if show_progress: | ||
| sequences = tqdm(sequences) | ||
| optimized = [] | ||
| for sequence in sequences: | ||
| seq = sequence[sequence_key] if sequence_key is not None else sequence.sequence | ||
| name = sequence[id_key] if id_key is not None else sequence.id | ||
| locus = sequence[locus_key] if locus_key is not None else sequence["locus"] | ||
| if add_locus_to_name and locus is not None: | ||
| name += f"_{locus}" | ||
| opt_seq = codon_optimize(seq, frame=frame, as_string=True) | ||
| synthesis_construct = ( | ||
| overhang_5.get(locus, "") + opt_seq + overhang_3.get(locus, "") | ||
| ) | ||
| optimized.append(Sequence(synthesis_construct, id=name)) | ||
| return optimized | ||
| GIBSON5 = { | ||
| "IGH": "catcctttttctagtagcaactgcaaccggtgtacac", | ||
| "IGK": "atcctttttctagtagcaactgcaaccggtgtacac", | ||
| "IGL": "atcctttttctagtagcaactgcaaccggtgtacac", | ||
| "heavy": "catcctttttctagtagcaactgcaaccggtgtacac", | ||
| "kappa": "atcctttttctagtagcaactgcaaccggtgtacac", | ||
| "lambda": "atcctttttctagtagcaactgcaaccggtgtacac", | ||
| } | ||
| GIBSON3 = { | ||
| "IGH": "gcgtcgaccaagggcccatcggtcttcc", | ||
| "IGK": "cgtacggtggctgcaccatctgtcttcatc", | ||
| "IGL": "ggtcagcccaaggctgccccctcggtcactctgttcccgccctcgagtgaggagcttcaagccaacaaggcc", | ||
| "heavy": "gcgtcgaccaagggcccatcggtcttcc", | ||
| "kappa": "cgtacggtggctgcaccatctgtcttcatc", | ||
| "lambda": "ggtcagcccaaggctgccccctcggtcactctgttcccgccctcgagtgaggagcttcaagccaacaaggcc", | ||
| } | ||
| LOCUS_MAP = { | ||
| "IGH": "heavy", | ||
| "IGK": "kappa", | ||
| "IGL": "lambda", | ||
| "TRA": "alpha", | ||
| "TRB": "beta", | ||
| "TRD": "delta", | ||
| "TRG": "gamma", | ||
| } |
| #!/usr/bin/python | ||
| # filename: preprocessing.py | ||
| # | ||
| # Copyright (c) 2024 Bryan Briney | ||
| # License: The MIT license (http://opensource.org/licenses/MIT) | ||
| # | ||
| # Permission is hereby granted, free of charge, to any person obtaining a copy of this software | ||
| # and associated documentation files (the "Software"), to deal in the Software without restriction, | ||
| # including without limitation the rights to use, copy, modify, merge, publish, distribute, | ||
| # sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is | ||
| # furnished to do so, subject to the following conditions: | ||
| # | ||
| # The above copyright notice and this permission notice shall be included in all copies or | ||
| # substantial portions of the Software. | ||
| # | ||
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING | ||
| # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
| # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, | ||
| # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
| # | ||
| import os | ||
| import subprocess as sp | ||
| from typing import Iterable, Optional, Union | ||
| from natsort import natsorted | ||
| from tqdm.auto import tqdm | ||
| from ..bin import get_path as get_binary_path | ||
| from ..io import concatenate_files, delete_files, list_files, make_dir, rename_file | ||
| class FASTQFile: | ||
| """ """ | ||
| def __init__(self, file: str): | ||
| self.file = file | ||
| self.path = os.path.abspath(file) | ||
| self.basename = os.path.basename(self.path) | ||
| self.dir = os.path.dirname(self.path) | ||
| def __eq__(self, other): | ||
| return self.name == other.name | ||
| class IlluminaFile(FASTQFile): | ||
| """ """ | ||
| def __init__(self, file: str): | ||
| super().__init__(file) | ||
| @property | ||
| def name(self): | ||
| return "_".join(self.basename.split("_")[:-4]) | ||
| @property | ||
| def number(self): | ||
| return self.basename.split(".")[0].split("_")[-1] | ||
| @property | ||
| def read(self): | ||
| return self.basename.split("_")[-2] | ||
| @property | ||
| def lane(self): | ||
| return self.basename.split("_")[-3] | ||
| @property | ||
| def sample(self): | ||
| return self.basename.split("_")[-4] | ||
| class MergeGroup: | ||
| """ """ | ||
| def __init__(self, name: str, files: Iterable[FASTQFile]): | ||
| self.name = name | ||
| self.files = files | ||
| self.merged_file = None # assigned by merge function | ||
| def merge( | ||
| self, | ||
| merged_directory: str, | ||
| format: str = "fastq", | ||
| algo: str = "vsearch", | ||
| binary_path: Optional[str] = None, | ||
| merge_args: Optional[str] = None, | ||
| verbose: bool = False, | ||
| debug: bool = False, | ||
| ) -> str: | ||
| groups = self._group_by_lane() | ||
| n_groups = len(groups) | ||
| if verbose: | ||
| print(f"{self.name}") | ||
| print("-" * len(self.name)) | ||
| if n_groups > 1: | ||
| groups = tqdm(groups, desc=" - merging lanes") | ||
| # merge function | ||
| if algo.lower() == "vsearch": | ||
| merge_func = merge_fastqs_vsearch | ||
| else: | ||
| raise ValueError(f"Invalid merge algorithm: {algo}. Must be 'vsearch'.") | ||
| # merge files | ||
| merged_files = [] | ||
| for group in groups: | ||
| r1, r2 = natsorted(group, key=lambda x: x.read) | ||
| merged_file = merge_func( | ||
| r1.path, | ||
| r2.path, | ||
| os.path.join(merged_directory, f"{self.name}.{format.lower}"), | ||
| binary_path=binary_path, | ||
| additional_args=merge_args, | ||
| output_format=format, | ||
| debug=debug, | ||
| ) | ||
| merged_files.append(merged_file) | ||
| self.merged_file = os.path.join(merged_directory, f"{self.name}.{format.lower}") | ||
| if len(merged_files) > 1: | ||
| if verbose: | ||
| print(" - concatenating merged files") | ||
| concatenate_files(merged_files, self.merged_file) | ||
| delete_files(merged_files) | ||
| else: | ||
| rename_file(merged_files, self.merged_file) | ||
| return self.merged_file | ||
| def _group_by_lane(self): | ||
| lane_dict = {} | ||
| for f in self.files: | ||
| if f.lane not in lane_dict: | ||
| lane_dict[f.lane] = [] | ||
| lane_dict[f.lane].append(f) | ||
| return [l[0] for l in natsorted(lane_dict.items(), key=lambda x: x[0])] | ||
| def merge_fastqs( | ||
| files: Union[str, Iterable], | ||
| output_directory: str, | ||
| output_format: str = "fastq", | ||
| schema: str = "illumina", | ||
| algo: str = "vsearch", | ||
| binary_path: Optional[str] = None, | ||
| merge_args: Optional[str] = None, | ||
| debug: bool = False, | ||
| verbose: bool = False, | ||
| ) -> Iterable[str]: | ||
| """ | ||
| Merge paired-end fastq files. | ||
| Parameters | ||
| ---------- | ||
| files : Union[str, Iterable] | ||
| Path to a directory containing paired-end fastq files, or an iterable of paired-end fastq files. | ||
| output_directory : str | ||
| Path to the directory in which to save the merged files. | ||
| output_format : str, optional | ||
| Output format. Must be 'fastq' or 'fasta'. Default is 'fastq'. | ||
| schema : str, optional | ||
| Schema of the file names. Must be 'illumina'. Default is 'illumina'. | ||
| algo : str, optional | ||
| Algorithm to use for merging. Must be 'vsearch'. Default is 'vsearch'. | ||
| binary_path : str, optional | ||
| Path to the merge algorithm binary. If not provided, the binary packaged with abutils will be used. | ||
| merge_args : str, optional | ||
| Additional arguments (as a string) to pass to the merge function. | ||
| debug : bool, optional | ||
| If True, print debug output. Default is False. | ||
| verbose : bool, optional | ||
| If True, print verbose output. Default is False. | ||
| Returns | ||
| ------- | ||
| list | ||
| A list of paths to the merged FASTA/Q files. | ||
| """ | ||
| # process input files | ||
| if isinstance(files, str): | ||
| if not os.path.isdir(files): | ||
| err = f"The supplied file path ({files}) does not exist or is not a directory." | ||
| raise ValueError(err) | ||
| files = list_files(files, extension=[".fastq", ".fq", ".fastq.gz", ".fq.gz"]) | ||
| if schema.lower() == "illumina": | ||
| files = [IlluminaFile(f) for f in files] | ||
| else: | ||
| raise ValueError(f"Invalid schema: {schema}. Must be 'illumina'.") | ||
| # group files by sample | ||
| file_pairs = group_fastq_pairs(files, verbose=verbose) | ||
| # merge files | ||
| make_dir(output_directory) | ||
| merged_files = [] | ||
| for fp in file_pairs: | ||
| merged_file = fp.merge( | ||
| merged_directory=output_directory, | ||
| format=output_format, | ||
| algo=algo, | ||
| binary_path=binary_path, | ||
| merge_args=merge_args, | ||
| debug=debug, | ||
| verbose=verbose, | ||
| ) | ||
| merged_files.append(merged_file) | ||
| return merged_files | ||
| def group_fastq_pairs( | ||
| files: Iterable[FASTQFile], verbose: bool = False | ||
| ) -> Iterable[MergeGroup]: | ||
| """ | ||
| Group paired-end fastq files by sample name. If a sample has multiple lanes, the files will be combined. | ||
| Parameters | ||
| ---------- | ||
| files : Union[str, Iterable] | ||
| Path to a directory containing paired-end fastq files, or an iterable of paired-end fastq files. | ||
| verbose : bool, optional | ||
| If True, print verbose output. Default is False. | ||
| Returns | ||
| ------- | ||
| list | ||
| A list of MergeGroup objects. | ||
| """ | ||
| group_dict = {} | ||
| for f in files: | ||
| if f.name not in group_dict: | ||
| group_dict[f.name] = [] | ||
| group_dict[f.name].append(f) | ||
| groups = [ | ||
| MergeGroup(name, group_files, verbose=verbose) | ||
| for name, group_files in group_dict.items() | ||
| ] | ||
| return groups | ||
| def merge_fastqs_vsearch( | ||
| forward: str, | ||
| reverse: str, | ||
| merged_file: str, | ||
| output_format: str = "fasta", | ||
| binary_path: Optional[str] = None, | ||
| additional_args: Optional[str] = None, | ||
| debug: bool = False, | ||
| ) -> str: | ||
| """ | ||
| Merge paired-end reads using vsearch. | ||
| Parameters | ||
| ---------- | ||
| forward : str | ||
| Path to the forward read file. | ||
| reverse : str | ||
| Path to the reverse read file. | ||
| merged_file : str | ||
| Path to the merged read file. | ||
| output_format : str, optional | ||
| Output format. Must be 'fasta' or 'fastq'. Default is 'fasta'. | ||
| binary_path : str, optional | ||
| Path to a vsearch binary. If not provided, the vsearch binary packaged with abutils will be used. | ||
| additional_args : str, optional | ||
| Additional arguments (as a string) to pass to vsearch. | ||
| debug : bool, optional | ||
| If True, print vsearch stdout and stderr. Default is False. | ||
| Returns | ||
| ------- | ||
| str | ||
| Path to the merged read file. | ||
| """ | ||
| # validate input files | ||
| if not os.path.isfile(forward): | ||
| err = f"The supplied forward read file path ({forward}) does not exist or is not a file." | ||
| raise ValueError(err) | ||
| if not os.path.isfile(reverse): | ||
| err = f"The supplied reverse read file path ({reverse}) does not exist or is not a file." | ||
| raise ValueError(err) | ||
| # make output directory | ||
| out_dir = os.path.dirname(merged_file) | ||
| make_dir(out_dir) | ||
| # get the vsearch binary | ||
| if binary_path is None: | ||
| binary_path = get_binary_path("vsearch") | ||
| # compile the vsearch command | ||
| cmd = f"{binary_path} --fastq_mergepairs {forward} --reverse {reverse}" | ||
| if output_format.lower() == "fasta": | ||
| cmd += " --fastaout {merged_file}" | ||
| elif output_format.lower() == "fastq": | ||
| cmd += " --fastqout {merged_file}" | ||
| else: | ||
| err = f"Invalid output format: {output_format}. Must be 'fasta' or 'fastq'." | ||
| raise ValueError(err) | ||
| if additional_args is not None: | ||
| cmd += f" {additional_args}" | ||
| # merge reads | ||
| p = sp.Popen(cmd, shell=True, stdout=sp.PIPE, stderr=sp.PIPE) | ||
| stdout, stderr = p.communicate() | ||
| if debug: | ||
| print(stdout.decode()) | ||
| print(stderr.decode()) | ||
| if p.returncode != 0: | ||
| err = f"Error merging reads with vsearch: {stderr.decode()}" | ||
| raise ValueError(err) | ||
| return merged_file |
| Metadata-Version: 2.1 | ||
| Name: abutils | ||
| Version: 0.4.16 | ||
| Version: 0.4.17 | ||
| Summary: Utilities for analysis of adaptive immune receptor repertoire (AIRR) data | ||
@@ -25,2 +25,3 @@ Home-page: https://github.com/briney/abutils | ||
| Requires-Dist: celery | ||
| Requires-Dist: dnachisel | ||
| Requires-Dist: ete3 | ||
@@ -27,0 +28,0 @@ Requires-Dist: fastcluster |
@@ -5,2 +5,3 @@ abstar>=0.6.3 | ||
| celery | ||
| dnachisel | ||
| ete3 | ||
@@ -7,0 +8,0 @@ fastcluster |
@@ -8,2 +8,3 @@ LICENSE | ||
| abutils/__init__.py | ||
| abutils/bin.py | ||
| abutils/cl.py | ||
@@ -22,2 +23,4 @@ abutils/io.py | ||
| abutils/bin/cdhit_linux_amd64 | ||
| abutils/bin/fastp_darwin | ||
| abutils/bin/fastp_linux | ||
| abutils/bin/fasttree_darwin_amd64 | ||
@@ -185,2 +188,3 @@ abutils/bin/fasttree_darwin_arm64 | ||
| abutils/tests/test_clonify.py | ||
| abutils/tests/test_cloning.py | ||
| abutils/tests/test_cluster.py | ||
@@ -195,4 +199,6 @@ abutils/tests/test_color.py | ||
| abutils/tools/clonify.py | ||
| abutils/tools/cloning.py | ||
| abutils/tools/cluster.py | ||
| abutils/tools/phylo.py | ||
| abutils/tools/preprocessing.py | ||
| abutils/tools/similarity.py | ||
@@ -199,0 +205,0 @@ abutils/utils/__init__.py |
@@ -6,3 +6,3 @@ # from .core import * | ||
| from . import cl, io, pl, tl | ||
| # from . import bin, cl, io, pl, tl | ||
| from . import cl as color | ||
@@ -9,0 +9,0 @@ from .core import lineage, pair, sequence |
+210
-6
@@ -36,2 +36,3 @@ #!/usr/bin/env python | ||
| import dnachisel as dc | ||
| import pandas as pd | ||
@@ -145,2 +146,3 @@ from Bio import SeqIO | ||
| self._reverse_complement = None | ||
| self._codon_optimized = None | ||
| self._strand = None | ||
@@ -321,2 +323,12 @@ self.id_key = id_key | ||
| @property | ||
| def codon_optimized(self): | ||
| if self._codon_optimized is None: | ||
| self._codon_optimized = self.codon_optimize(as_string=True) | ||
| return self._codon_optimized | ||
| @codon_optimized.setter | ||
| def codon_optimized(self, val): | ||
| self._codon_optimized = val | ||
| @property | ||
| def annotations(self): | ||
@@ -369,2 +381,44 @@ """ | ||
| def codon_optimize( | ||
| self, | ||
| sequence_key: str = "sequence_aa", | ||
| id_key: str = "sequence_id", | ||
| frame: Optional[int] = None, | ||
| as_string: bool = True, | ||
| ) -> Union[str, "Sequence"]: | ||
| """ | ||
| Codon optimize a sequence. | ||
| Parameters | ||
| ---------- | ||
| sequence_key : str, default="sequence_aa" | ||
| Name of the annotation field containg the sequence to be translated. | ||
| If not provided, ``Sequence.sequence`` is used. | ||
| id_key : str, default="sequence_id" | ||
| Name of the annotation field containg the sequence id. | ||
| If not provided, ``Sequence.id`` is used. | ||
| frame : int, default=1 | ||
| Reading frame to translate. Default is ``1``. | ||
| as_string : bool, default=True | ||
| If ``True``, the optimized sequence will be returned as a ``str``. | ||
| If ``False``, the optimized sequence will be returned as a ``Sequence`` object. | ||
| Returns | ||
| ------- | ||
| Union[str, Sequence] | ||
| Translated sequence as a ``str`` (if ``as_string`` is ``True``) or | ||
| ``Sequence`` object (if ``as_string`` is ``False``). | ||
| """ | ||
| return codon_optimize( | ||
| sequence=self, | ||
| sequence_key=sequence_key, | ||
| id_key=id_key, | ||
| frame=frame, | ||
| as_string=as_string, | ||
| ) | ||
| def as_fasta( | ||
@@ -465,3 +519,3 @@ self, name_field: Optional[str] = None, seq_field: Optional[str] = None | ||
| if id is None: | ||
| id = uuid.uuid4() | ||
| id = str(uuid.uuid4()) | ||
| self.sequence = str(seq).upper() | ||
@@ -586,2 +640,3 @@ self.id = id | ||
| translated : str | ||
| Translated sequence. | ||
@@ -597,3 +652,6 @@ """ | ||
| return None | ||
| start = (frame % 3) - 1 | ||
| # start = (frame % 3 or 3) - 1 # too clever by half | ||
| if frame not in range(1, 4): | ||
| raise ValueError(f"Invalid frame: {frame}. Must be 1, 2 or 3.") | ||
| start = frame - 1 | ||
| end = len(seq) - (len(seq[start:]) % 3) | ||
@@ -612,2 +670,98 @@ seq = seq[start:end] | ||
| def codon_optimize( | ||
| sequence, | ||
| sequence_key: Optional[str] = None, | ||
| id_key: Optional[str] = None, | ||
| frame: Optional[int] = None, | ||
| as_string: bool = False, | ||
| in_place: bool = False, | ||
| ) -> Union[str, "Sequence"]: | ||
| """ | ||
| Optimizes the codons of a nucleotide or amino acid sequence. | ||
| Parameters | ||
| ---------- | ||
| sequence : Union[str, Sequence] | ||
| Nucleotide or amino acid sequence to be optimized. | ||
| sequence_key : str, default=None | ||
| Name of the annotation field containg the sequence to be optimized. | ||
| If not provided, ``sequence.sequence`` is used. | ||
| id_key : str, default=None | ||
| Name of the annotation field containg the sequence ID. | ||
| If not provided, ``sequence.id`` is used. | ||
| frame : int, default=1 | ||
| Reading frame to translate. Default is ``1``. | ||
| .. note:: | ||
| ``frame`` is ignored if the input sequence is an amino acid sequence. | ||
| as_string : bool, default=False | ||
| If ``True``, the optimized sequence will be returned as a ``str``. | ||
| If ``False``, the optimized sequence will be returned as a ``Sequence`` object. | ||
| in_place : bool, default=False | ||
| If ``True``, the input ``Sequence`` object will be returned with the optimized sequence populating | ||
| the ``sequence.codon_optimized`` property. If ``False``, the optimized sequence will be returned | ||
| as a ``str`` (if ``as_string`` is ``True``) or a ``Sequence`` object (if ``as_string`` is ``False``). | ||
| Returns | ||
| ------- | ||
| Union[str, Sequence] | ||
| If ``in_place`` is ``True``, the input ``Sequence`` object will be returned with the optimized sequence populating | ||
| the ``Sequence.sequence`` property. | ||
| If ``as_string`` is ``True``, the optimized sequence will be returned as a ``str``. | ||
| If ``as_string`` is ``False``, the optimized sequence will be returned as a new ``Sequence`` object. | ||
| """ | ||
| # process input | ||
| if not isinstance(sequence, Sequence): | ||
| sequence = Sequence(sequence) | ||
| if sequence_key is not None: | ||
| seq = ( | ||
| sequence[sequence_key] | ||
| if sequence[sequence_key] is not None | ||
| else sequence.sequence | ||
| ) | ||
| else: | ||
| seq = sequence.sequence | ||
| if id_key is not None: | ||
| name = sequence[id_key] if id_key is not None else sequence.id | ||
| else: | ||
| name = sequence.id | ||
| # get DNA sequence | ||
| if not all([res.upper() in ["A", "C", "G", "T", "N", "-"] for res in sequence]): | ||
| dna_seq = dc.reverse_translate(seq) | ||
| else: | ||
| frame = frame if frame is not None else 1 | ||
| if frame not in range(1, 4): | ||
| raise ValueError(f"Invalid frame: {frame}. Must be 1, 2 or 3.") | ||
| start = frame - 1 | ||
| dna_seq = seq[start:] | ||
| # optimize codons | ||
| problem = dc.DnaOptimizationProblem( | ||
| sequence=dna_seq, | ||
| constraints=[ | ||
| dc.EnforceTranslation(), | ||
| dc.EnforceGCContent(maxi=0.56), | ||
| dc.EnforceGCContent(maxi=0.64, window=100), | ||
| dc.UniquifyAllKmers(10), | ||
| ], | ||
| objectives=[dc.CodonOptimize(species="h_sapiens")], | ||
| logger=None, | ||
| ) | ||
| problem.resolve_constraints(final_check=True) | ||
| problem.optimize() | ||
| if as_string: | ||
| return str(problem.sequence) | ||
| if in_place: | ||
| sequence.sequence = str(problem.sequence) | ||
| return sequence | ||
| return Sequence(problem.sequence, id=name) | ||
| def read_json( | ||
@@ -1044,2 +1198,7 @@ json_file: str, | ||
| index: bool = False, | ||
| properties: Optional[Iterable[str]] = None, | ||
| drop_na_columns: bool = True, | ||
| order: Optional[Iterable[str]] = None, | ||
| exclude: Optional[Union[str, Iterable[str]]] = None, | ||
| leading: Optional[Union[str, Iterable[str]]] = None, | ||
| ) -> None: | ||
@@ -1070,10 +1229,55 @@ """ | ||
| properties : list, default=None | ||
| A list of properties to be included in the CSV file. | ||
| drop_na_columns : bool, default=True | ||
| If ``True``, columns with all ``NaN`` values will be dropped from the CSV file. | ||
| Default is ``True``. | ||
| order : list, default=None | ||
| A list of fields in the order they should appear in the CSV file. | ||
| exclude : str or list, default=None | ||
| Field or list of fields to be excluded from the CSV file. | ||
| leading : str or list, default=None | ||
| Field or list of fields to appear first in the CSV file. Supercedes ``order``, so | ||
| if both are provided, fields in ``leading`` will appear first in the CSV file and | ||
| remaining fields will appear in the order provided in ``order``. | ||
| """ | ||
| d = [] | ||
| data = [] | ||
| for s in sequences: | ||
| if not s.annotations: | ||
| d.append({"sequence_id": s.id, "sequence": s.sequence}) | ||
| d = {"sequence_id": s.id, "sequence": s.sequence} | ||
| else: | ||
| d.append(s.annotations) | ||
| df = pd.DataFrame(d) | ||
| d = s.annotations | ||
| if properties is not None: | ||
| for prop in properties: | ||
| try: | ||
| d[prop] = getattr(s, prop) | ||
| except AttributeError: | ||
| continue | ||
| data.append(d) | ||
| df = pd.DataFrame(data) | ||
| # drop NaN | ||
| if drop_na_columns: | ||
| df.dropna(axis=1, how="all", inplace=True) | ||
| # excluded columns | ||
| if exclude is not None: | ||
| if isinstance(exclude, str): | ||
| exclude = [] | ||
| cols = [c for c in df.columns.values if c not in exclude] | ||
| df = df[cols] | ||
| # reorder | ||
| if order is not None: | ||
| cols = [o for o in order if o in df.columns.values] | ||
| df = df[cols] | ||
| if leading is not None: | ||
| if isinstance(leading, str): | ||
| leading = [leading] | ||
| leading = [l for l in leading if l in df.columns.values] | ||
| cols = leading + [c for c in df.columns.values if c not in leading] | ||
| df = df[cols] | ||
| df.to_csv(csv_file, sep=sep, index=index, columns=columns, header=header) |
+122
-1
@@ -25,3 +25,6 @@ #!/usr/bin/env python | ||
| import glob | ||
| import os | ||
| import sys | ||
| from typing import Iterable, Union | ||
@@ -40,5 +43,123 @@ from .core.sequence import ( | ||
| from .utils.convert import abi_to_fasta | ||
| from .utils.pipeline import list_files, make_dir | ||
| # from .utils.pipeline import list_files, make_dir | ||
| def make_dir(directory: str) -> None: | ||
| """ | ||
| Makes a directory, if it doesn't already exist. | ||
| Parameters | ||
| ---------- | ||
| directory : str | ||
| Path to a directory. | ||
| """ | ||
| if not os.path.exists(directory): | ||
| os.makedirs(os.path.abspath(directory)) | ||
| def list_files( | ||
| directory: str, extension: Union[str, Iterable, None] = None | ||
| ) -> Iterable[str]: | ||
| """ | ||
| Lists files in a given directory. | ||
| Parameters | ||
| ---------- | ||
| directory : str | ||
| Path to a directory. | ||
| extension : str | ||
| If supplied, only files that end with the specificied extension(s) will be returned. Can be either | ||
| a string or a list of strings. Extension evaluation is case-insensitive and can match complex | ||
| extensions (e.g. '.fastq.gz'). Default is ``None``, which returns all files in the directory, | ||
| regardless of extension. | ||
| Returns | ||
| ------- | ||
| Iterable[str] | ||
| """ | ||
| if os.path.isdir(directory): | ||
| expanded_dir = os.path.expanduser(directory) | ||
| files = sorted(glob.glob(expanded_dir + "/*")) | ||
| else: | ||
| files = [ | ||
| directory, | ||
| ] | ||
| if extension is not None: | ||
| if isinstance(extension, str): | ||
| extension = [ | ||
| extension, | ||
| ] | ||
| files = [ | ||
| f | ||
| for f in files | ||
| if any( | ||
| [ | ||
| any([f.lower().endswith(e.lower()) for e in extension]), | ||
| any([f.endswith(e.upper()) for e in extension]), | ||
| any([f.endswith(e.lower()) for e in extension]), | ||
| ] | ||
| ) | ||
| ] | ||
| return files | ||
| def rename_file(file: str, new_name: str) -> None: | ||
| """ | ||
| Renames a file. | ||
| Parameters | ||
| ---------- | ||
| file : str | ||
| Path to the file to be renamed. | ||
| new_name : str | ||
| New name for the file. | ||
| """ | ||
| os.rename(file, new_name) | ||
| def delete_files(files: Union[str, Iterable]) -> None: | ||
| """ | ||
| Deletes files. | ||
| Parameters | ||
| ---------- | ||
| files : Union[str, Iterable] | ||
| Path to a file or an iterable of file paths. | ||
| """ | ||
| if isinstance(files, str): | ||
| files = [ | ||
| files, | ||
| ] | ||
| for f in files: | ||
| if os.path.exists(f): | ||
| os.remove(f) | ||
| def concatenate_files(files: Iterable[str], output_file: str) -> None: | ||
| """ | ||
| Concatenates multiple files into a single file. | ||
| Parameters | ||
| ---------- | ||
| files : Iterable[str] | ||
| Iterable of file paths. | ||
| output_file : str | ||
| Path to the output file. | ||
| """ | ||
| with open(output_file, "w") as outfile: | ||
| for fname in files: | ||
| with open(fname) as infile: | ||
| for line in infile: | ||
| outfile.write(line) | ||
| def read_sequences( | ||
@@ -45,0 +166,0 @@ file=None, |
@@ -5,8 +5,8 @@ #!usr/env/python | ||
| import os | ||
| import pytest | ||
| import tempfile | ||
| import pytest | ||
| from Bio.SeqRecord import SeqRecord | ||
| from ..core.sequence import Sequence, read_fasta, read_airr, to_fasta | ||
| from ..core.sequence import Sequence, read_airr, read_fasta, to_fasta | ||
@@ -275,1 +275,27 @@ | ||
| assert ">seq1\nATCG\n>seq2\nGCTA\n>seq3\nTTTT" in fasta | ||
| # ------------------------------------------- | ||
| # codon optimization | ||
| # ------------------------------------------- | ||
| def test_codon_optimize_returns_string(): | ||
| seq = Sequence("RINEYLA") | ||
| optimized_seq = seq.codon_optimize(as_string=True) | ||
| assert isinstance(optimized_seq, str) | ||
| assert len(optimized_seq) == len(seq) * 3 | ||
| def test_codon_optimize_returns_sequence_object(): | ||
| seq = Sequence("RINEYLA") | ||
| optimized_seq = seq.codon_optimize(as_string=False) | ||
| assert isinstance(optimized_seq, Sequence) | ||
| assert len(optimized_seq.sequence) == len(seq) * 3 | ||
| assert optimized_seq.translate() == seq.sequence | ||
| def test_codon_optimize_with_frame(): | ||
| seq = Sequence("ATGCATGCATGCATGC") | ||
| with pytest.raises(ValueError): | ||
| seq.codon_optimize(frame=4) |
+2
-1
@@ -27,5 +27,6 @@ #!/usr/bin/env python | ||
| from .core.pair import assign_pairs | ||
| from .core.sequence import reverse_complement, translate | ||
| from .core.sequence import codon_optimize, reverse_complement, translate | ||
| from .tools.alignment import * | ||
| from .tools.clonify import clonify | ||
| from .tools.cloning import * | ||
| from .tools.cluster import * | ||
@@ -32,0 +33,0 @@ from .tools.phylo import * |
@@ -1,16 +0,37 @@ | ||
| from collections import Counter | ||
| #!/usr/bin/env python | ||
| # filename: clonify.py | ||
| # | ||
| # | ||
| # Copyright (c) 2023 Bryan Briney | ||
| # License: The MIT license (http://opensource.org/licenses/MIT) | ||
| # | ||
| # Permission is hereby granted, free of charge, to any person obtaining a copy of this software | ||
| # and associated documentation files (the "Software"), to deal in the Software without restriction, | ||
| # including without limitation the rights to use, copy, modify, merge, publish, distribute, | ||
| # sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is | ||
| # furnished to do so, subject to the following conditions: | ||
| # | ||
| # The above copyright notice and this permission notice shall be included in all copies or | ||
| # substantial portions of the Software. | ||
| # | ||
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING | ||
| # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
| # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, | ||
| # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
| # | ||
| import itertools | ||
| import sys | ||
| from collections import Counter | ||
| from typing import Iterable, Union | ||
| import fastcluster as fc | ||
| import numpy as np | ||
| import pandas as pd | ||
| import numpy as np | ||
| from Levenshtein import distance | ||
| import fastcluster as fc | ||
| from mnemonic import Mnemonic | ||
| from scipy.cluster.hierarchy import fcluster | ||
| from mnemonic import Mnemonic | ||
| from ..core.sequence import Sequence | ||
@@ -17,0 +38,0 @@ from ..tools import cluster |
+123
-14
@@ -37,2 +37,3 @@ #!/usr/bin/env python | ||
| from ..bin import get_path as get_binary_path | ||
| from ..core.sequence import Sequence, read_fasta, to_fasta | ||
@@ -226,2 +227,7 @@ from ..utils.decorators import lazy_property | ||
| iddef: int = 0, | ||
| cluster_mode: str = "2", | ||
| cov_mode: str = "0", | ||
| coverage: float = 0.8, | ||
| alignment_mode: str = "3", | ||
| seq_id_mode: str = "1", | ||
| vsearch_bin: str = None, | ||
@@ -232,2 +238,3 @@ mmseqs_bin: str = None, | ||
| seq_key: Optional[str] = None, | ||
| threads: Optional[int] = None, | ||
| strand: str = "plus", | ||
@@ -285,2 +292,32 @@ as_dict: bool = False, | ||
| cluster_mode : str, default="2" | ||
| Clustering mode, as implemented by MMseqs2. Options are: | ||
| 1. greedy set cover | ||
| 2. connected component | ||
| 3. greedy incremental (CD-HIT-like) | ||
| cov_mode : str, default="0" | ||
| Coverage mode, as implemented by MMseqs2. Options are: | ||
| 0. bidirectional | ||
| 1. target coverage | ||
| 2. query coverage | ||
| 3. target-in-query length coverage | ||
| coverage : float, default=0.8 | ||
| Coverage threshold for clustering with MMseqs2. Must be between 0 and 1. | ||
| alignment_mode : str, default="3" | ||
| Alignment mode, as implemented by MMseqs2. Options are: | ||
| 0. automatic | ||
| 1. only score and end_pos | ||
| 2. also start_pos and cov | ||
| 3. also seq.id | ||
| 4. only ungapped alignment | ||
| seq_id_mode : str, default="1" | ||
| Sequence ID mode, as implemented by MMseqs2. Options are: | ||
| 0. alignment length | ||
| 1. shorter sequence length | ||
| 2. longer sequence length | ||
| vsearch_bin : str, optional | ||
@@ -359,2 +396,8 @@ Path to a VSEARCH executable. If not provided, the VSEARCH binary bundled | ||
| threshold=threshold, | ||
| cluster_mode=cluster_mode, | ||
| cov_mode=cov_mode, | ||
| coverage=coverage, | ||
| alignment_mode=alignment_mode, | ||
| seq_id_mode=seq_id_mode, | ||
| threads=threads, | ||
| temp_dir=temp_dir, | ||
@@ -371,2 +414,3 @@ mmseqs_bin=mmseqs_bin, | ||
| cdhit_bin=cdhit_bin, | ||
| threads=0 if threads is None else threads, | ||
| as_dict=True, | ||
@@ -383,2 +427,3 @@ debug=debug, | ||
| strand=strand, | ||
| threads=threads, | ||
| as_dict=True, | ||
@@ -409,2 +454,3 @@ debug=debug, | ||
| strand: str = "plus", | ||
| threads: Optional[int] = None, | ||
| as_dict: bool = False, | ||
@@ -478,6 +524,7 @@ debug: bool = False, | ||
| if vsearch_bin is None: | ||
| mod_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||
| system = platform.system().lower() | ||
| machine = platform.machine().lower().replace("x86_64", "amd64") | ||
| vsearch_bin = os.path.join(mod_dir, f"bin/vsearch_{system}_{machine}") | ||
| vsearch_bin = get_binary_path("vsearch") | ||
| # mod_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||
| # system = platform.system().lower() | ||
| # machine = platform.machine().lower().replace("x86_64", "amd64") | ||
| # vsearch_bin = os.path.join(mod_dir, f"bin/vsearch_{system}_{machine}") | ||
| # do clustering | ||
@@ -492,2 +539,4 @@ vsearch_cmd = f"{vsearch_bin} --cluster_fast {fasta_file}" | ||
| vsearch_cmd += f" --strand {strand}" | ||
| if threads is not None: | ||
| vsearch_cmd += f" --threads {threads}" | ||
| p = sp.Popen(vsearch_cmd, stdout=sp.PIPE, stderr=sp.PIPE, shell=True) | ||
@@ -531,2 +580,8 @@ stdout, stderr = p.communicate() | ||
| threshold: float = 0.975, | ||
| cluster_mode: str = "2", | ||
| cov_mode: str = "0", | ||
| coverage: float = 0.8, | ||
| alignment_mode: str = "3", | ||
| seq_id_mode: str = "1", | ||
| threads: Optional[int] = None, | ||
| temp_dir: str = "/tmp", | ||
@@ -549,2 +604,42 @@ mmseqs_bin: str = None, | ||
| cluster_mode : str, default="2" | ||
| Clustering mode. Options are ``"1"``, ``"2"``, or ``"3"``. | ||
| - ``"1"``: greedy set cover | ||
| - ``"2"``: connected component | ||
| - ``"3"``: greedy incremental (CD-HIT-like) | ||
| See the `MMseqs2 documentation <https://mmseqs.com/latest/userguide.html#clustering>`_ | ||
| for more information. | ||
| cov_mode : str, default="0" | ||
| Coverage mode. Options are ``"0"``, ``"1"``, ``"2"``, or ``"3"``. | ||
| - ``"0"``: bidirectional | ||
| - ``"1"``: target coverage | ||
| - ``"2"``: query coverage | ||
| - ``"3"``: target-in-query length coverage | ||
| See the `MMseqs2 documentation <https://mmseqs.com/latest/userguide.html#clustering>`_ | ||
| for more information. | ||
| coverage : float, default=0.8 | ||
| Coverage threshold for clustering. Must be between 0 and 1. | ||
| alignment_mode : str, default="3" | ||
| Alignment mode. Options are ``"0"``, ``"1"``, ``"2"``, ``"3"``, or ``"4"``. | ||
| - ``"0"``: automatic | ||
| - ``"1"``: only score and end_pos | ||
| - ``"2"``: also start_pos and cov | ||
| - ``"3"``: also seq.id | ||
| - ``"4"``: only ungapped alignment | ||
| See the `MMseqs2 documentation <https://mmseqs.com/latest/userguide.html#clustering>`_ | ||
| for more information. | ||
| seq_id_mode : str, default="1" | ||
| Sequence ID mode. Options are ``"0"``, ``"1"``, or ``"2"``. | ||
| - ``"0"``: alignment length | ||
| - ``"1"``: shorter sequence length | ||
| - ``"2"``: longer sequence length | ||
| threads : int, default=None | ||
| Number of threads to use for clustering. If not provided, the number of threads | ||
| will be determined by MMseqs2. | ||
| temp_dir : str, default="/tmp" | ||
@@ -592,6 +687,7 @@ Path to a directory for temporary storage of clustering files. | ||
| if mmseqs_bin is None: | ||
| mod_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||
| system = platform.system().lower() | ||
| machine = platform.machine().lower().replace("x86_64", "amd64") | ||
| mmseqs_bin = os.path.join(mod_dir, f"bin/mmseqs_{system}_{machine}") | ||
| mmseqs_bin = get_binary_path("mmseqs") | ||
| # mod_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||
| # system = platform.system().lower() | ||
| # machine = platform.machine().lower().replace("x86_64", "amd64") | ||
| # mmseqs_bin = os.path.join(mod_dir, f"bin/mmseqs_{system}_{machine}") | ||
| # build the mmseqs DB | ||
@@ -609,2 +705,9 @@ db_cmd = f"{mmseqs_bin} createdb {fasta_file} {db_file}" | ||
| cluster_cmd += f" --min-seq-id {threshold}" | ||
| cluster_cmd += f" --cluster-mode {cluster_mode}" | ||
| cluster_cmd += f" --cov-mode {cov_mode}" | ||
| cluster_cmd += f" -c {coverage}" | ||
| cluster_cmd += f" --alignment-mode {alignment_mode}" | ||
| cluster_cmd += f" --seq-id-mode {seq_id_mode}" | ||
| if threads is not None: | ||
| cluster_cmd += f" --threads {threads}" | ||
| p = sp.Popen(cluster_cmd, stdout=sp.PIPE, stderr=sp.PIPE, shell=True) | ||
@@ -657,2 +760,3 @@ stdout, stderr = p.communicate() | ||
| cdhit_bin: str = None, | ||
| threads: int = 0, | ||
| as_dict: bool = False, | ||
@@ -680,2 +784,6 @@ debug: bool = False, | ||
| threads : int, default=0 | ||
| Number of threads to use for clustering. If not provided, all available CPU cores | ||
| will be used. | ||
| id_key : str, default=None | ||
@@ -715,6 +823,7 @@ Key to retrieve the sequence ID. If not provided or missing, ``Sequence.id`` is used. | ||
| if cdhit_bin is None: | ||
| mod_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||
| system = platform.system().lower() | ||
| machine = platform.machine().lower().replace("x86_64", "amd64") | ||
| cdhit_bin = os.path.join(mod_dir, f"bin/cdhit_{system}_{machine}") | ||
| cdhit_bin = get_binary_path("cdhit") | ||
| # mod_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||
| # system = platform.system().lower() | ||
| # machine = platform.machine().lower().replace("x86_64", "amd64") | ||
| # cdhit_bin = os.path.join(mod_dir, f"bin/cdhit_{system}_{machine}") | ||
| # run CD-HIT | ||
@@ -729,4 +838,4 @@ # clustering options are as follows: | ||
| cluster_cmd = f"{cdhit_bin} -i {fasta_file} -o {output_file}" | ||
| cluster_cmd += f" -c {threshold} -n {wordsize}" | ||
| cluster_cmd += " -d 0 -l 4 -T 0 -M 0" | ||
| cluster_cmd += f" -c {threshold} -n {wordsize} -T {threads}" | ||
| cluster_cmd += " -d 0 -l 4 -M 0" | ||
| p = sp.Popen(cluster_cmd, stdout=sp.PIPE, stderr=sp.PIPE, shell=True) | ||
@@ -733,0 +842,0 @@ stdout, stderr = p.communicate() |
@@ -44,2 +44,3 @@ #!/usr/bin/env python | ||
| from ..bin import get_path as get_binary_path | ||
| from ..core.sequence import Sequence, read_fasta, to_fasta | ||
@@ -113,10 +114,11 @@ from ..tools.cluster import Cluster, cluster | ||
| if fasttree_bin is None: | ||
| mod_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||
| # fasttree_bin = os.path.join( | ||
| # mod_dir, f"bin/fasttree_{platform.system().lower()}" | ||
| # ) | ||
| system = platform.system().lower() | ||
| machine = platform.machine().lower() | ||
| fasttree_bin = os.path.join(mod_dir, f"bin/fasttree_{system}_{machine}") | ||
| fasttree_bin = fasttree_bin.replace("x86_64", "amd64") | ||
| fasttree_bin = get_binary_path("fasttree") | ||
| # mod_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||
| # # fasttree_bin = os.path.join( | ||
| # # mod_dir, f"bin/fasttree_{platform.system().lower()}" | ||
| # # ) | ||
| # system = platform.system().lower() | ||
| # machine = platform.machine().lower() | ||
| # fasttree_bin = os.path.join(mod_dir, f"bin/fasttree_{system}_{machine}") | ||
| # fasttree_bin = fasttree_bin.replace("x86_64", "amd64") | ||
| # make output directory if necessary | ||
@@ -123,0 +125,0 @@ if tree_file is None: |
@@ -33,6 +33,5 @@ #!/usr/bin/env python | ||
| from ..core.pair import Pair | ||
| from ..core.sequence import Sequence | ||
| from ..core.pair import Pair | ||
| __all__ = [ | ||
@@ -153,2 +152,28 @@ "RepertoireSimilarity", | ||
| def __iter__(self): | ||
| for s in self.similarities: | ||
| yield s | ||
| def __len__(self): | ||
| return len(self.similarities) | ||
| def __repr__(self): | ||
| return "<RepertoireSimilarities: {} comparisons>".format(len(self)) | ||
| def __str__(self): | ||
| return "<RepertoireSimilarities: {} comparisons>".format(len(self)) | ||
| def __add__(self, other): | ||
| if isinstance(other, RepertoireSimilarity): | ||
| self.add(other) | ||
| return self | ||
| elif isinstance(other, RepertoireSimilarities): | ||
| for s in other: | ||
| self.add(s) | ||
| return self | ||
| else: | ||
| raise TypeError( | ||
| "RepertoireSimilarities can only be added to RepertoireSimilarity or RepertoireSimilarities objects." | ||
| ) | ||
| @property | ||
@@ -166,3 +191,3 @@ def df(self): | ||
| @property | ||
| def means(self): | ||
| def means(self) -> np.array: | ||
| """ | ||
@@ -174,3 +199,3 @@ Returns an array of mean similarity values, one for each pairwise comparison. | ||
| @property | ||
| def medians(self): | ||
| def medians(self) -> np.array: | ||
| """ | ||
@@ -182,3 +207,3 @@ Returns an array of median similarity values, one for each pairwise comparison. | ||
| @property | ||
| def mins(self): | ||
| def mins(self) -> np.array: | ||
| """ | ||
@@ -190,3 +215,3 @@ Returns an array of minimum similarity values, one for each pairwise comparison. | ||
| @property | ||
| def maxs(self): | ||
| def maxs(self) -> np.array: | ||
| """ | ||
@@ -198,3 +223,3 @@ Returns an array of maximum similarity values, one for each pairwise comparison. | ||
| @property | ||
| def stds(self): | ||
| def stds(self) -> np.array: | ||
| """ | ||
@@ -206,3 +231,3 @@ Returns an array of standard deviations, one for each pairwise comparison. | ||
| @property | ||
| def sems(self): | ||
| def sems(self) -> np.array: | ||
| """ | ||
@@ -213,3 +238,7 @@ Returns an array of standard errors of the mean, one for each pairwise comparison. | ||
| def squareform(self, method=None, agg=np.mean): | ||
| def squareform( | ||
| self, | ||
| # method=None, | ||
| agg=np.mean, | ||
| ) -> pd.DataFrame: | ||
| """ | ||
@@ -233,4 +262,4 @@ Returns a squareform representation of the similarity data. | ||
| _df = self.df[(self.df["sample1"] == s1) & (self.df["sample2"] == s2)] | ||
| if method is not None: | ||
| _df = _df[_df["method"] == method] | ||
| # if method is not None: | ||
| # _df = _df[_df["method"] == method] | ||
| if _df.shape[0] == 0: | ||
@@ -278,5 +307,10 @@ continue | ||
| A list of repertoires. Each repertoire can be a list of ``abutils.Sequence`` | ||
| objects or ``abutils.Pair`` objects. Individual repertoires cannot mix ``Sequence`` | ||
| and ``Pair`` objects, but the list of repertoires can contain both. | ||
| objects or a list of ``abutils.Pair`` objects. | ||
| .. note:: | ||
| Individual repertoires cannot mix ``Sequence`` and ``Pair`` objects, | ||
| but the list of repertoires can contain both. For example, comparing a | ||
| repetoire of ``Sequence`` objects to a repertoire of ``Pair`` objects is | ||
| allowed. | ||
| names : list, optional | ||
@@ -287,3 +321,11 @@ A list of names for the repertoires. If ``None``, names will be generated as | ||
| method : str, optional | ||
| The similarity method to use. Default is ``"morisita-horn"``. | ||
| The similarity method to use. Options are: | ||
| - ``"morisita-horn"`` | ||
| - ``"kullback-leibler"`` | ||
| - ``"jensen-shannon"`` | ||
| - ``"jaccard"`` | ||
| - ``"bray-curtis"`` | ||
| - ``"renkonen"`` | ||
| - ``"cosine"`` | ||
| Default is ``"morisita-horn"``. | ||
@@ -304,3 +346,4 @@ features : str, list, optional | ||
| pairs_only : bool, optional | ||
| Whether to use only paired sequences. Default is ``False``. | ||
| Whether to use only paired sequences. Requires that all repertoires contain ``Pair`` objects, | ||
| rather than ``Sequence`` objects. Default is ``False``. | ||
@@ -310,2 +353,7 @@ chain : str, optional | ||
| .. note:: | ||
| This applies to both ``Sequence`` and ``Pair`` objects. If a repertoire contains | ||
| ``Sequence`` objects from multiple chains, only sequences of the specified chain will | ||
| be used for similarity calculation, and the rest will be ignored. | ||
| force_self_comparisons : bool, optional | ||
@@ -355,3 +403,3 @@ Whether to force self-comparisons. Only used if exactly two repertoires are provided. | ||
| elif len(names) != len(repertoires): | ||
| err = f"\nThe number of names must match the number of repertoires.\n" | ||
| err = "\nThe number of names must match the number of repertoires.\n" | ||
| err += f"You provided {len(names)} names and {len(repertoires)} repertoires.\n" | ||
@@ -358,0 +406,0 @@ raise RuntimeError(err) |
@@ -31,14 +31,20 @@ #!/usr/bin/env python | ||
| import sys | ||
| from typing import Iterable, Optional | ||
| from typing import Iterable, Optional, Union | ||
| from . import log | ||
| if sys.version_info[0] > 2: | ||
| STR_TYPES = [ | ||
| str, | ||
| ] | ||
| # else: | ||
| # STR_TYPES = [str, unicode] | ||
| # for backward compatibility | ||
| def list_files(*args, **kwargs): | ||
| from ..io import list_files | ||
| return list_files(*args, **kwargs) | ||
| def make_dir(*args, **kwargs): | ||
| from ..io import make_dir | ||
| return make_dir(*args, **kwargs) | ||
| def initialize(log_file, project_dir=None, debug=False): | ||
@@ -82,58 +88,62 @@ """ | ||
| def make_dir(directory: str) -> None: | ||
| """ | ||
| Makes a directory, if it doesn't already exist. | ||
| # def make_dir(directory: str) -> None: | ||
| # """ | ||
| # Makes a directory, if it doesn't already exist. | ||
| Parameters | ||
| ---------- | ||
| directory : str | ||
| Path to a directory. | ||
| # Parameters | ||
| # ---------- | ||
| # directory : str | ||
| # Path to a directory. | ||
| """ | ||
| if not os.path.exists(directory): | ||
| os.makedirs(os.path.abspath(directory)) | ||
| # """ | ||
| # if not os.path.exists(directory): | ||
| # os.makedirs(os.path.abspath(directory)) | ||
| def list_files(directory: str, extension: Optional[str] = None) -> Iterable[str]: | ||
| """ | ||
| Lists files in a given directory. | ||
| # def list_files( | ||
| # directory: str, extension: Union[str, Iterable, None] = None | ||
| # ) -> Iterable[str]: | ||
| # """ | ||
| # Lists files in a given directory. | ||
| Parameters | ||
| ---------- | ||
| directory : str | ||
| Path to a directory. | ||
| # Parameters | ||
| # ---------- | ||
| # directory : str | ||
| # Path to a directory. | ||
| extension : str | ||
| If supplied, only files that contain the specificied extension will be returned. | ||
| Default is ``None``, which returns all files in the directory, regardless of extension. | ||
| # extension : str | ||
| # If supplied, only files that end with the specificied extension(s) will be returned. Can be either | ||
| # a string or a list of strings. Extension evaluation is case-insensitive and can match complex | ||
| # extensions (e.g. '.fastq.gz'). Default is ``None``, which returns all files in the directory, | ||
| # regardless of extension. | ||
| Returns | ||
| ------- | ||
| Iterable[str] | ||
| # Returns | ||
| # ------- | ||
| # Iterable[str] | ||
| """ | ||
| if os.path.isdir(directory): | ||
| expanded_dir = os.path.expanduser(directory) | ||
| files = sorted(glob.glob(expanded_dir + "/*")) | ||
| else: | ||
| files = [ | ||
| directory, | ||
| ] | ||
| if extension is not None: | ||
| if type(extension) in STR_TYPES: | ||
| extension = [ | ||
| extension, | ||
| ] | ||
| files = [ | ||
| f | ||
| for f in files | ||
| if any( | ||
| [ | ||
| f.split(".")[-1] in extension, | ||
| f.split(".")[-1].upper() in extension, | ||
| f.split(".")[-1].lower() in extension, | ||
| ] | ||
| ) | ||
| ] | ||
| return files | ||
| # """ | ||
| # if os.path.isdir(directory): | ||
| # expanded_dir = os.path.expanduser(directory) | ||
| # files = sorted(glob.glob(expanded_dir + "/*")) | ||
| # else: | ||
| # files = [ | ||
| # directory, | ||
| # ] | ||
| # if extension is not None: | ||
| # if isinstance(extension, str): | ||
| # extension = [ | ||
| # extension, | ||
| # ] | ||
| # files = [ | ||
| # f | ||
| # for f in files | ||
| # if any( | ||
| # [ | ||
| # any([f.lower().endswith(e.lower()) for e in extension]), | ||
| # any([f.endswith(e.upper()) for e in extension]), | ||
| # any([f.endswith(e.lower()) for e in extension]), | ||
| # ] | ||
| # ) | ||
| # ] | ||
| # return files | ||
@@ -140,0 +150,0 @@ |
@@ -6,2 +6,2 @@ # Store the version here so: | ||
| __version__ = "0.4.16" | ||
| __version__ = "0.4.17" |
+2
-1
| Metadata-Version: 2.1 | ||
| Name: abutils | ||
| Version: 0.4.16 | ||
| Version: 0.4.17 | ||
| Summary: Utilities for analysis of adaptive immune receptor repertoire (AIRR) data | ||
@@ -25,2 +25,3 @@ Home-page: https://github.com/briney/abutils | ||
| Requires-Dist: celery | ||
| Requires-Dist: dnachisel | ||
| Requires-Dist: ete3 | ||
@@ -27,0 +28,0 @@ Requires-Dist: fastcluster |
+1
-0
@@ -5,2 +5,3 @@ abstar >= 0.6.3 | ||
| celery | ||
| dnachisel | ||
| ete3 | ||
@@ -7,0 +8,0 @@ fastcluster |
Sorry, the diff of this file is too big to display
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
245339132
4.92%225
2.74%22353
5.3%