Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoInstallSign in
Socket

mimseq

Package Overview
Dependencies
Maintainers
2
Versions
65
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

mimseq - npm Package Compare versions

Comparing version
1.3.7
to
1.3.8
+1
-1
mimseq.egg-info/PKG-INFO
Metadata-Version: 2.1
Name: mimseq
Version: 1.3.7
Version: 1.3.8
Summary: Custom high-throughput tRNA sequencing alignment and quantification pipeline based on modification induced misincorporation cDNA synthesis.

@@ -5,0 +5,0 @@ Home-page: https://github.com/nedialkova-lab/mim-tRNAseq

@@ -9,2 +9,3 @@ biopython

pybedtools
requests
statsmodels

@@ -32,3 +32,2 @@ LICENSE.txt

mimseq/data/modomics
mimseq/data/modomics_orig
mimseq/data/tRNAmatureseq.cm

@@ -35,0 +34,0 @@ mimseq/data/araTha1-eColitK/FastaHeadersforMimseq.py

@@ -97,8 +97,5 @@ #! /usr/bin/env python3

map_round = 1 #first round of mapping
# Parse tRNA and modifications, generate SNP index
modifications = os.path.dirname(os.path.realpath(__file__))
modifications += "/modifications"
coverage_bed, snp_tolerance, mismatch_dict, insert_dict, del_dict, mod_lists, Inosine_lists, Inosine_clusters, tRNA_dict, cluster_dict, cluster_perPos_mismatchMembers \
= modsToSNPIndex(trnas, trnaout, mito_trnas, plastid_trnas, modifications, name, out, double_cca, threads, snp_tolerance, cluster, cluster_id, posttrans, pretrnas, local_mod)
= modsToSNPIndex(gtRNAdb = trnas, tRNAscan_out = trnaout, mitotRNAs = mito_trnas, plastidtRNAs = plastid_trnas, experiment_name = name, out_dir = out, double_cca = double_cca, threads = threads, snp_tolerance = snp_tolerance, cluster = cluster, cluster_id = cluster_id, posttrans_mod_off = posttrans, pretrnas = pretrnas, local_mod = local_mod)
structureParser()

@@ -105,0 +102,0 @@ # Generate GSNAP indices

@@ -31,7 +31,8 @@ #!/usr/bin/env python3

def tRNAparser (gtRNAdb, tRNAscan_out, mitotRNAs, plastidtRNAs, modifications_table, posttrans_mod_off, double_cca, pretrnas, local_mod):
def tRNAparser (gtRNAdb, tRNAscan_out, mitotRNAs, plastidtRNAs, posttrans_mod_off, double_cca, pretrnas, local_mod):
# tRNA sequence files parser and dictionary building
# Generate modification reference table
modifications = modificationParser(modifications_table)
modifications_file, fetch = getModifications(local_mod)
modifications = modificationParser(modifications_file, fetch)
temp_name = gtRNAdb.split("/")[-1]

@@ -218,3 +219,3 @@

def getModomics(local_mod):
# Get full Modomics modified tRNA data from web
# Get full Modomics modified tRNA data from API
fetch = False

@@ -230,16 +231,136 @@ if not local_mod:

log.error("Unable to connect to Modomics database! HTTP error: {}. Check status of Modomics webpage. Using local Modomics files...".format(http_err))
modomics_path = os.path.dirname(os.path.realpath(__file__)) + '/data/modomics'
modomics = open(modomics_path, "r+", encoding = "utf-8")
modomics = openLocalModomics('/data/modomics')
except Exception as err:
log.error("Error in connecting to Modomics: {}. Using local Modomics files...".format(err))
modomics_path = os.path.dirname(os.path.realpath(__file__)) + '/data/modomics'
modomics = open(modomics_path, "r+", encoding = "utf-8")
modomics = openLocalModomics('/data/modomics')
else:
log.warning("Retrieval of Modomics database disabled. Using local files instead...")
modomics_path = os.path.dirname(os.path.realpath(__file__)) + '/data/modomics'
modomics = open(modomics_path, "r+", encoding = "utf-8")
modomics = openLocalModomics('/data/modomics')
return modomics, fetch
def modsToSNPIndex(gtRNAdb, tRNAscan_out, mitotRNAs, plastidtRNAs, modifications_table, experiment_name, out_dir, double_cca, threads, snp_tolerance = False, cluster = False, cluster_id = 0.95, posttrans_mod_off = False, pretrnas = False, local_mod = False, search='usearch'):
def openLocalModomics(filepath):
# Open the local modomics file for reading
modomics_path = os.path.dirname(os.path.realpath(__file__)) + filepath
modomics = open(modomics_path, "r", encoding = "utf-8")
return modomics
def getModifications(local_mod):
# Get modification lookup table from Modomics via API
fetch = False
if not local_mod:
try:
response = requests.get("https://www.genesilico.pl/modomics/api/modifications")
response.raise_for_status()
modifications = response.json()
fetch = True
log.info("Modification table retrieved...")
except HTTPError as http_err:
log.error("Unable to connect to Modomics database! HTTP error: {}. Check status of Modomics webpage. Using local Modomics files...".format(http_err))
modifications = openLocalModomics('/modifications')
except Exception as err:
log.error("Error in connecting to Modomics: {}. Using local Modomics files...".format(err))
modifications = openLocalModomics('/modifications')
else:
log.warning("Retrieval of Modomics database disabled. Using local files instead...")
modifications = openLocalModomics('/modifications')
return modifications, fetch
def modificationParser(modifications_table, fetch):
# Read in modifications and build dictionary
modifications = {}
if fetch:
log.info("Parsing Modification JSON data...")
for data in modifications_table.values():
modifications[data["abbrev"].strip()] = {'name':data["name"].strip(), 'abbr':data["short_name"].strip(), 'ref':data["reference_moiety"][0].strip()}
elif not fetch:
log.info("Parsing local Modification data...")
for line in modifications_table:
if not line.startswith("#"):
name, abbr, ref, mod = line.split('\t')
# replace unknown modifications with reference of N
if not ref or ref.isspace():
ref = 'N'
if mod and not mod.isspace():
modifications[mod.strip()] = {'name':name.strip(), 'abbr':abbr.strip(), 'ref':ref.strip()}
return(modifications)
def getUnmodSeq(seq, modification_table):
# Change modified bases into standard ACGT in input sequence
new_seq = []
for char in seq:
# for insertions ('_') make reference N - this is not described in the modifications table
if char == '_':
char = 'N'
else:
char = modification_table[char]['ref']
# Change queuosine to G (reference is preQ0base in modification file)
if char == 'preQ0base':
char = 'G'
new_seq.append(char)
new_seq = ''.join(new_seq)
new_seq = new_seq.replace('U','T')
return(new_seq)
def initIntronDict(tRNAscan_out):
# Build dictionary of intron locations
Intron_dict = {}
tRNAscan = open(tRNAscan_out, 'r')
intron_count = 0
for line in tRNAscan:
if not line.startswith(("Sequence", "Name", "-")):
tRNA_ID = line.split()[0] + ".trna" + line.split()[1]
tRNA_start = int(line.split()[2])
intron_start = int(line.split()[6])
intron_stop = int(line.split()[7])
# if inton boundaries are not 0, i.e. there is an intron then add to dict
if (intron_start > 0) & (intron_stop > 0):
if tRNA_start > intron_start: # tRNA is on reverse strand
intron_count += 1
intron_start = tRNA_start - intron_start
intron_stop = tRNA_start - intron_stop + 1 # needed for python 0 indexing and correct slicing of intron
else: # tRNA is on forward strand
intron_count += 1
intron_start -= tRNA_start
intron_stop -= tRNA_start
intron_stop += 1 # python 0 indexing
Intron_dict[tRNA_ID] = {}
Intron_dict[tRNA_ID]['intron_start'] = intron_start
Intron_dict[tRNA_ID]['intron_stop'] = intron_stop
log.info("{} introns registered...".format(intron_count))
return(Intron_dict)
def intronRemover (Intron_dict, seqIO_dict, seqIO_record, posttrans_mod_off, double_cca):
# Use Intron_dict to find and remove introns plus add CCA and 5' G for His (if eukaryotic)
# Find a match, slice intron and add G and CCA
ID = re.search("tRNAscan-SE ID: (.*?)\).|\((chr.*?)-",seqIO_dict[seqIO_record].description).groups()
ID = list(filter(None, ID))[0]
if ID in Intron_dict:
seq = str(seqIO_dict[seqIO_record].seq[:Intron_dict[ID]['intron_start']] + seqIO_dict[seqIO_record].seq[Intron_dict[ID]['intron_stop']:])
else:
seq = str(seqIO_dict[seqIO_record].seq)
if posttrans_mod_off == False:
if double_cca:
seq = seq + 'CCACCA'
else:
seq = seq + 'CCA'
if 'His' in seqIO_record:
seq = 'G' + seq
return(seq)
def modsToSNPIndex(gtRNAdb, tRNAscan_out, mitotRNAs, plastidtRNAs, experiment_name, out_dir, double_cca, threads, snp_tolerance = False, cluster = False, cluster_id = 0.95, posttrans_mod_off = False, pretrnas = False, local_mod = False, search='usearch'):
# Builds SNP index needed for GSNAP based on modificaiton data for each tRNA and clusters tRNAs

@@ -256,3 +377,3 @@

# generate modomics_dict and tRNA_dict
tRNA_dict, modomics_dict, species = tRNAparser(gtRNAdb, tRNAscan_out, mitotRNAs, plastidtRNAs, modifications_table, posttrans_mod_off, double_cca, pretrnas, local_mod)
tRNA_dict, modomics_dict, species = tRNAparser(gtRNAdb, tRNAscan_out, mitotRNAs, plastidtRNAs, posttrans_mod_off, double_cca, pretrnas, local_mod)
temp_dir = out_dir + "/tmp/"

@@ -910,89 +1031,2 @@

def modificationParser(modifications_table):
# Read in modifications and build dictionary
mods = open(modifications_table, 'r', encoding='utf-8')
modifications = {}
for line in mods:
if not line.startswith("#"):
name, abbr, ref, mod = line.split('\t')
# replace unknown modifications with reference of N
if not ref or ref.isspace():
ref = 'N'
if mod and not mod.isspace():
modifications[mod.strip()] = {'name':name.strip(), 'abbr':abbr.strip(), 'ref':ref.strip()}
return(modifications)
def getUnmodSeq(seq, modification_table):
# Change modified bases into standard ACGT in input sequence
new_seq = []
for char in seq:
# for insertions ('_') make reference N - this is not described in the modifications table
if char == '_':
char = 'N'
else:
char = modification_table[char]['ref']
# Change queuosine to G (reference is preQ0base in modification file)
if char == 'preQ0base':
char = 'G'
new_seq.append(char)
new_seq = ''.join(new_seq)
new_seq = new_seq.replace('U','T')
return(new_seq)
def initIntronDict(tRNAscan_out):
# Build dictionary of intron locations
Intron_dict = {}
tRNAscan = open(tRNAscan_out, 'r')
intron_count = 0
for line in tRNAscan:
if not line.startswith(("Sequence", "Name", "-")):
tRNA_ID = line.split()[0] + ".trna" + line.split()[1]
tRNA_start = int(line.split()[2])
intron_start = int(line.split()[6])
intron_stop = int(line.split()[7])
# if inton boundaries are not 0, i.e. there is an intron then add to dict
if (intron_start > 0) & (intron_stop > 0):
if tRNA_start > intron_start: # tRNA is on reverse strand
intron_count += 1
intron_start = tRNA_start - intron_start
intron_stop = tRNA_start - intron_stop + 1 # needed for python 0 indexing and correct slicing of intron
else: # tRNA is on forward strand
intron_count += 1
intron_start -= tRNA_start
intron_stop -= tRNA_start
intron_stop += 1 # python 0 indexing
Intron_dict[tRNA_ID] = {}
Intron_dict[tRNA_ID]['intron_start'] = intron_start
Intron_dict[tRNA_ID]['intron_stop'] = intron_stop
log.info("{} introns registered...".format(intron_count))
return(Intron_dict)
def intronRemover (Intron_dict, seqIO_dict, seqIO_record, posttrans_mod_off, double_cca):
# Use Intron_dict to find and remove introns plus add CCA and 5' G for His (if eukaryotic)
# Find a match, slice intron and add G and CCA
ID = re.search("tRNAscan-SE ID: (.*?)\).|\((chr.*?)-",seqIO_dict[seqIO_record].description).groups()
ID = list(filter(None, ID))[0]
if ID in Intron_dict:
seq = str(seqIO_dict[seqIO_record].seq[:Intron_dict[ID]['intron_start']] + seqIO_dict[seqIO_record].seq[Intron_dict[ID]['intron_stop']:])
else:
seq = str(seqIO_dict[seqIO_record].seq)
if posttrans_mod_off == False:
if double_cca:
seq = seq + 'CCACCA'
else:
seq = seq + 'CCA'
if 'His' in seqIO_record:
seq = 'G' + seq
return(seq)
def countsAnticodon(input_counts, out_dir):

@@ -999,0 +1033,0 @@ # Counts per anticodon

@@ -1,1 +0,1 @@

__version__ = "v1.3.7"
__version__ = "v1.3.8"
Metadata-Version: 2.1
Name: mimseq
Version: 1.3.7
Version: 1.3.8
Summary: Custom high-throughput tRNA sequencing alignment and quantification pipeline based on modification induced misincorporation cDNA synthesis.

@@ -5,0 +5,0 @@ Home-page: https://github.com/nedialkova-lab/mim-tRNAseq

@@ -39,8 +39,6 @@ <p align="center">

To use mim-tRNAseq, it is recommended to install the package using `conda`, preferably in its own environment. Significant time and dependency-related improvements can be made to using conda for managing environment and installing mimseq using the [Mambaforge](https://github.com/conda-forge/miniforge) version of conda Miniforge. We recommend installing Mambaforge and then followin the steps below:
To use mim-tRNAseq, it is recommended to install the package using `conda`, preferably in its own environment. Significant time and dependency-related improvements can be made to using conda for managing environment and installing mimseq using the [Miniforge](https://github.com/conda-forge/miniforge) version of conda which oncludes optional use for Mamba. We recommend installing Miniforge and then following the steps below:
```bash
conda create -n mimseq python=3.7
conda activate mimseq
conda config --add channels conda-forge
conda install -c conda-forge mamba
mamba install -c bioconda mimseq

@@ -62,3 +60,3 @@ ```

Alternatively, mim-tRNAseq can be installed with `pip`, in which case all additional non-python package dependencies (see documentation) will also need to be installed.
Alternatively, mim-tRNAseq can be installed with `pip`, in which case all additional non-python package dependencies (including `usearch` as above, `BLAST`, `infernal`, `GMAP/GSNAP`, and all required R packages) will also need to be installed manually.
```bash

@@ -65,0 +63,0 @@ pip install mimseq

@@ -51,2 +51,3 @@ #!/usr/bin/env python

"pybedtools",
"requests",
"statsmodels"],

@@ -53,0 +54,0 @@ classifiers=[

Sorry, the diff of this file is too big to display