Metadata-Version: 2.1
		Name: mimseq
		Version: 1.3.7
		Version: 1.3.8
		Summary: Custom high-throughput tRNA sequencing alignment and quantification pipeline based on modification induced misincorporation cDNA synthesis.
		@@ -5,0 +5,0 @@ Home-page: https://github.com/nedialkova-lab/mim-tRNAseq

+1

-0

mimseq.egg-info/requires.txt

		@@ -9,2 +9,3 @@ biopython
		pybedtools
		requests
		statsmodels

+0

-1

mimseq.egg-info/SOURCES.txt

		@@ -32,3 +32,2 @@ LICENSE.txt
		mimseq/data/modomics
		mimseq/data/modomics_orig
		mimseq/data/tRNAmatureseq.cm
		@@ -35,0 +34,0 @@ mimseq/data/araTha1-eColitK/FastaHeadersforMimseq.py

+1

-4

mimseq/mimseq.py

		@@ -97,8 +97,5 @@ #! /usr/bin/env python3
		map_round = 1 #first round of mapping

		# Parse tRNA and modifications, generate SNP index
		modifications = os.path.dirname(os.path.realpath(__file__))
		modifications += "/modifications"
		coverage_bed, snp_tolerance, mismatch_dict, insert_dict, del_dict, mod_lists, Inosine_lists, Inosine_clusters, tRNA_dict, cluster_dict, cluster_perPos_mismatchMembers \
		= modsToSNPIndex(trnas, trnaout, mito_trnas, plastid_trnas, modifications, name, out, double_cca, threads, snp_tolerance, cluster, cluster_id, posttrans, pretrnas, local_mod)
		= modsToSNPIndex(gtRNAdb = trnas, tRNAscan_out = trnaout, mitotRNAs = mito_trnas, plastidtRNAs = plastid_trnas, experiment_name = name, out_dir = out, double_cca = double_cca, threads = threads, snp_tolerance = snp_tolerance, cluster = cluster, cluster_id = cluster_id, posttrans_mod_off = posttrans, pretrnas = pretrnas, local_mod = local_mod)
		structureParser()
		@@ -105,0 +102,0 @@ # Generate GSNAP indices

+132

-98

mimseq/tRNAtools.py

		@@ -31,7 +31,8 @@ #!/usr/bin/env python3

		def tRNAparser (gtRNAdb, tRNAscan_out, mitotRNAs, plastidtRNAs, modifications_table, posttrans_mod_off, double_cca, pretrnas, local_mod):
		def tRNAparser (gtRNAdb, tRNAscan_out, mitotRNAs, plastidtRNAs, posttrans_mod_off, double_cca, pretrnas, local_mod):
		# tRNA sequence files parser and dictionary building

		# Generate modification reference table
		modifications = modificationParser(modifications_table)
		modifications_file, fetch = getModifications(local_mod)
		modifications = modificationParser(modifications_file, fetch)
		temp_name = gtRNAdb.split("/")[-1]
		@@ -218,3 +219,3 @@
		def getModomics(local_mod):
		# Get full Modomics modified tRNA data from web
		# Get full Modomics modified tRNA data from API
		fetch = False
		@@ -230,16 +231,136 @@ if not local_mod:
		log.error("Unable to connect to Modomics database! HTTP error: {}. Check status of Modomics webpage. Using local Modomics files...".format(http_err))
		modomics_path = os.path.dirname(os.path.realpath(__file__)) + '/data/modomics'
		modomics = open(modomics_path, "r+", encoding = "utf-8")
		modomics = openLocalModomics('/data/modomics')
		except Exception as err:
		log.error("Error in connecting to Modomics: {}. Using local Modomics files...".format(err))
		modomics_path = os.path.dirname(os.path.realpath(__file__)) + '/data/modomics'
		modomics = open(modomics_path, "r+", encoding = "utf-8")
		modomics = openLocalModomics('/data/modomics')
		else:
		log.warning("Retrieval of Modomics database disabled. Using local files instead...")
		modomics_path = os.path.dirname(os.path.realpath(__file__)) + '/data/modomics'
		modomics = open(modomics_path, "r+", encoding = "utf-8")
		modomics = openLocalModomics('/data/modomics')

		return modomics, fetch

		def modsToSNPIndex(gtRNAdb, tRNAscan_out, mitotRNAs, plastidtRNAs, modifications_table, experiment_name, out_dir, double_cca, threads, snp_tolerance = False, cluster = False, cluster_id = 0.95, posttrans_mod_off = False, pretrnas = False, local_mod = False, search='usearch'):
		def openLocalModomics(filepath):
		# Open the local modomics file for reading
		modomics_path = os.path.dirname(os.path.realpath(__file__)) + filepath
		modomics = open(modomics_path, "r", encoding = "utf-8")
		return modomics

		def getModifications(local_mod):
		# Get modification lookup table from Modomics via API
		fetch = False
		if not local_mod:
		try:
		response = requests.get("https://www.genesilico.pl/modomics/api/modifications")
		response.raise_for_status()
		modifications = response.json()
		fetch = True
		log.info("Modification table retrieved...")
		except HTTPError as http_err:
		log.error("Unable to connect to Modomics database! HTTP error: {}. Check status of Modomics webpage. Using local Modomics files...".format(http_err))
		modifications = openLocalModomics('/modifications')
		except Exception as err:
		log.error("Error in connecting to Modomics: {}. Using local Modomics files...".format(err))
		modifications = openLocalModomics('/modifications')
		else:
		log.warning("Retrieval of Modomics database disabled. Using local files instead...")
		modifications = openLocalModomics('/modifications')

		return modifications, fetch

		def modificationParser(modifications_table, fetch):
		# Read in modifications and build dictionary

		modifications = {}

		if fetch:
		log.info("Parsing Modification JSON data...")
		for data in modifications_table.values():
		modifications[data["abbrev"].strip()] = {'name':data["name"].strip(), 'abbr':data["short_name"].strip(), 'ref':data["reference_moiety"][0].strip()}

		elif not fetch:
		log.info("Parsing local Modification data...")
		for line in modifications_table:
		if not line.startswith("#"):
		name, abbr, ref, mod = line.split('\t')
		# replace unknown modifications with reference of N
		if not ref or ref.isspace():
		ref = 'N'
		if mod and not mod.isspace():
		modifications[mod.strip()] = {'name':name.strip(), 'abbr':abbr.strip(), 'ref':ref.strip()}

		return(modifications)

		def getUnmodSeq(seq, modification_table):
		# Change modified bases into standard ACGT in input sequence

		new_seq = []
		for char in seq:
		# for insertions ('_') make reference N - this is not described in the modifications table
		if char == '_':
		char = 'N'
		else:
		char = modification_table[char]['ref']
		# Change queuosine to G (reference is preQ0base in modification file)
		if char == 'preQ0base':
		char = 'G'

		new_seq.append(char)

		new_seq = ''.join(new_seq)
		new_seq = new_seq.replace('U','T')
		return(new_seq)

		def initIntronDict(tRNAscan_out):
		# Build dictionary of intron locations

		Intron_dict = {}
		tRNAscan = open(tRNAscan_out, 'r')
		intron_count = 0
		for line in tRNAscan:
		if not line.startswith(("Sequence", "Name", "-")):
		tRNA_ID = line.split()[0] + ".trna" + line.split()[1]
		tRNA_start = int(line.split()[2])
		intron_start = int(line.split()[6])
		intron_stop = int(line.split()[7])
		# if inton boundaries are not 0, i.e. there is an intron then add to dict
		if (intron_start > 0) & (intron_stop > 0):
		if tRNA_start > intron_start: # tRNA is on reverse strand
		intron_count += 1
		intron_start = tRNA_start - intron_start
		intron_stop = tRNA_start - intron_stop + 1 # needed for python 0 indexing and correct slicing of intron
		else: # tRNA is on forward strand
		intron_count += 1
		intron_start -= tRNA_start
		intron_stop -= tRNA_start
		intron_stop += 1 # python 0 indexing

		Intron_dict[tRNA_ID] = {}
		Intron_dict[tRNA_ID]['intron_start'] = intron_start
		Intron_dict[tRNA_ID]['intron_stop'] = intron_stop

		log.info("{} introns registered...".format(intron_count))
		return(Intron_dict)


		def intronRemover (Intron_dict, seqIO_dict, seqIO_record, posttrans_mod_off, double_cca):
		# Use Intron_dict to find and remove introns plus add CCA and 5' G for His (if eukaryotic)

		# Find a match, slice intron and add G and CCA
		ID = re.search("tRNAscan-SE ID: (.?)\).\|\((chr.?)-",seqIO_dict[seqIO_record].description).groups()
		ID = list(filter(None, ID))[0]
		if ID in Intron_dict:
		seq = str(seqIO_dict[seqIO_record].seq[:Intron_dict[ID]['intron_start']] + seqIO_dict[seqIO_record].seq[Intron_dict[ID]['intron_stop']:])
		else:
		seq = str(seqIO_dict[seqIO_record].seq)
		if posttrans_mod_off == False:
		if double_cca:
		seq = seq + 'CCACCA'
		else:
		seq = seq + 'CCA'
		if 'His' in seqIO_record:
		seq = 'G' + seq

		return(seq)

		def modsToSNPIndex(gtRNAdb, tRNAscan_out, mitotRNAs, plastidtRNAs, experiment_name, out_dir, double_cca, threads, snp_tolerance = False, cluster = False, cluster_id = 0.95, posttrans_mod_off = False, pretrnas = False, local_mod = False, search='usearch'):
		# Builds SNP index needed for GSNAP based on modificaiton data for each tRNA and clusters tRNAs
		@@ -256,3 +377,3 @@
		# generate modomics_dict and tRNA_dict
		tRNA_dict, modomics_dict, species = tRNAparser(gtRNAdb, tRNAscan_out, mitotRNAs, plastidtRNAs, modifications_table, posttrans_mod_off, double_cca, pretrnas, local_mod)
		tRNA_dict, modomics_dict, species = tRNAparser(gtRNAdb, tRNAscan_out, mitotRNAs, plastidtRNAs, posttrans_mod_off, double_cca, pretrnas, local_mod)
		temp_dir = out_dir + "/tmp/"
		@@ -910,89 +1031,2 @@

		def modificationParser(modifications_table):
		# Read in modifications and build dictionary

		mods = open(modifications_table, 'r', encoding='utf-8')
		modifications = {}
		for line in mods:
		if not line.startswith("#"):
		name, abbr, ref, mod = line.split('\t')
		# replace unknown modifications with reference of N
		if not ref or ref.isspace():
		ref = 'N'
		if mod and not mod.isspace():
		modifications[mod.strip()] = {'name':name.strip(), 'abbr':abbr.strip(), 'ref':ref.strip()}
		return(modifications)

		def getUnmodSeq(seq, modification_table):
		# Change modified bases into standard ACGT in input sequence

		new_seq = []
		for char in seq:
		# for insertions ('_') make reference N - this is not described in the modifications table
		if char == '_':
		char = 'N'
		else:
		char = modification_table[char]['ref']
		# Change queuosine to G (reference is preQ0base in modification file)
		if char == 'preQ0base':
		char = 'G'

		new_seq.append(char)

		new_seq = ''.join(new_seq)
		new_seq = new_seq.replace('U','T')
		return(new_seq)

		def initIntronDict(tRNAscan_out):
		# Build dictionary of intron locations

		Intron_dict = {}
		tRNAscan = open(tRNAscan_out, 'r')
		intron_count = 0
		for line in tRNAscan:
		if not line.startswith(("Sequence", "Name", "-")):
		tRNA_ID = line.split()[0] + ".trna" + line.split()[1]
		tRNA_start = int(line.split()[2])
		intron_start = int(line.split()[6])
		intron_stop = int(line.split()[7])
		# if inton boundaries are not 0, i.e. there is an intron then add to dict
		if (intron_start > 0) & (intron_stop > 0):
		if tRNA_start > intron_start: # tRNA is on reverse strand
		intron_count += 1
		intron_start = tRNA_start - intron_start
		intron_stop = tRNA_start - intron_stop + 1 # needed for python 0 indexing and correct slicing of intron
		else: # tRNA is on forward strand
		intron_count += 1
		intron_start -= tRNA_start
		intron_stop -= tRNA_start
		intron_stop += 1 # python 0 indexing

		Intron_dict[tRNA_ID] = {}
		Intron_dict[tRNA_ID]['intron_start'] = intron_start
		Intron_dict[tRNA_ID]['intron_stop'] = intron_stop

		log.info("{} introns registered...".format(intron_count))
		return(Intron_dict)


		def intronRemover (Intron_dict, seqIO_dict, seqIO_record, posttrans_mod_off, double_cca):
		# Use Intron_dict to find and remove introns plus add CCA and 5' G for His (if eukaryotic)

		# Find a match, slice intron and add G and CCA
		ID = re.search("tRNAscan-SE ID: (.?)\).\|\((chr.?)-",seqIO_dict[seqIO_record].description).groups()
		ID = list(filter(None, ID))[0]
		if ID in Intron_dict:
		seq = str(seqIO_dict[seqIO_record].seq[:Intron_dict[ID]['intron_start']] + seqIO_dict[seqIO_record].seq[Intron_dict[ID]['intron_stop']:])
		else:
		seq = str(seqIO_dict[seqIO_record].seq)
		if posttrans_mod_off == False:
		if double_cca:
		seq = seq + 'CCACCA'
		else:
		seq = seq + 'CCA'
		if 'His' in seqIO_record:
		seq = 'G' + seq

		return(seq)

		def countsAnticodon(input_counts, out_dir):
		@@ -999,0 +1033,0 @@ # Counts per anticodon

+1

-1

mimseq/version.py

		@@ -1,1 +0,1 @@
		__version__ = "v1.3.7"
		__version__ = "v1.3.8"

+1

-1

PKG-INFO

		Metadata-Version: 2.1
		Name: mimseq
		Version: 1.3.7
		Version: 1.3.8
		Summary: Custom high-throughput tRNA sequencing alignment and quantification pipeline based on modification induced misincorporation cDNA synthesis.
		@@ -5,0 +5,0 @@ Home-page: https://github.com/nedialkova-lab/mim-tRNAseq

+2

-4

README.md

		@@ -39,8 +39,6 @@ <p align="center">

		To use mim-tRNAseq, it is recommended to install the package using `conda`, preferably in its own environment. Significant time and dependency-related improvements can be made to using conda for managing environment and installing mimseq using the [Mambaforge](https://github.com/conda-forge/miniforge) version of conda Miniforge. We recommend installing Mambaforge and then followin the steps below:
		To use mim-tRNAseq, it is recommended to install the package using `conda`, preferably in its own environment. Significant time and dependency-related improvements can be made to using conda for managing environment and installing mimseq using the [Miniforge](https://github.com/conda-forge/miniforge) version of conda which oncludes optional use for Mamba. We recommend installing Miniforge and then following the steps below:
		```bash
		conda create -n mimseq python=3.7
		conda activate mimseq
		conda config --add channels conda-forge
		conda install -c conda-forge mamba
		mamba install -c bioconda mimseq
		@@ -62,3 +60,3 @@ ```

		Alternatively, mim-tRNAseq can be installed with `pip`, in which case all additional non-python package dependencies (see documentation) will also need to be installed.
		Alternatively, mim-tRNAseq can be installed with `pip`, in which case all additional non-python package dependencies (including `usearch` as above, `BLAST`, `infernal`, `GMAP/GSNAP`, and all required R packages) will also need to be installed manually.
		```bash
		@@ -65,0 +63,0 @@ pip install mimseq

+1

-0

setup.py

		@@ -51,2 +51,3 @@ #!/usr/bin/env python
		"pybedtools",
		"requests",
		"statsmodels"],
		@@ -53,0 +54,0 @@ classifiers=[

mimseq/data/modomics_orig

Sorry, the diff of this file is too big to display

mimseq - npm Package Compare versions

Improved metrics

Worsened metrics