gumpy
Advanced tools
| Metadata-Version: 2.1 | ||
| Name: gumpy | ||
| Version: 1.2.7 | ||
| Version: 1.3.0 | ||
| Summary: Genetics with Numpy | ||
@@ -5,0 +5,0 @@ Home-page: https://github.com/oxfordmmm/gumpy |
+72
-18
@@ -40,3 +40,5 @@ """ | ||
| def __init__(self, record: pysam.libcbcf.VariantRecord, sample: str | int): | ||
| def __init__( | ||
| self, record: pysam.libcbcf.VariantRecord, sample: str | int, min_dp: int | None | ||
| ): | ||
| """Constructor for the VCFRecord object. | ||
@@ -50,2 +52,3 @@ | ||
| where there is more than 1 sample per record | ||
| min_dp (int | None): Minimum depth to consider a call. | ||
| """ | ||
@@ -101,9 +104,19 @@ | ||
| if key == "GT": | ||
| call1, call2 = item | ||
| self.call1 = call1 if call1 is not None else -1 | ||
| self.call2 = self.call1 if call2 is None else call2 | ||
| if len(item) == 2: | ||
| # Ploidy 2 is expected here | ||
| call1, call2 = item | ||
| self.call1 = call1 if call1 is not None else -1 | ||
| self.call2 = self.call1 if call2 is None else call2 | ||
| else: | ||
| # GVCF edge case with ploidy 1 | ||
| call1 = item[0] | ||
| self.call1 = call1 if call1 is not None else -1 | ||
| self.call2 = self.call1 | ||
| self.is_reference = ( | ||
| True if call1 == 0 and (call1 == call2 or call2 == -1) else False | ||
| True | ||
| if self.call1 == 0 | ||
| and (self.call1 == self.call2 or self.call2 == -1) | ||
| else False | ||
| ) | ||
| self.is_heterozygous = True if call1 != call2 else False | ||
| self.is_heterozygous = True if self.call1 != self.call2 else False | ||
| self.is_null = True if set([self.call1, self.call2]) == {-1} else False | ||
@@ -125,2 +138,27 @@ self.is_alt = ( | ||
| if min_dp is not None: | ||
| allelic_depth_tag = "COV" if "COV" in self.values.keys() else "AD" | ||
| # Ensure we have a COV tag for downstream analysis | ||
| self.values["COV"] = self.values[allelic_depth_tag] | ||
| print(self.values[allelic_depth_tag]) | ||
| if self.values[allelic_depth_tag] != (None,): | ||
| # If the depth given is below the threshold, | ||
| # this row is a null call's row from the GVCF | ||
| # So treat it as such. | ||
| if len(self.values[allelic_depth_tag]) == 1: | ||
| if self.values[allelic_depth_tag][0] < min_dp: | ||
| # Odd case where GVCF only gives the depth of the alt | ||
| self.is_null = True | ||
| self.is_heterozygous = False | ||
| self.is_alt = False | ||
| self.is_reference = False | ||
| elif ( | ||
| self.values[allelic_depth_tag][self.call1] < min_dp | ||
| or self.values[allelic_depth_tag][self.call2] < min_dp | ||
| ): | ||
| self.is_null = True | ||
| self.is_heterozygous = False | ||
| self.is_alt = False | ||
| self.is_reference = False | ||
| def __repr__(self) -> str: | ||
@@ -203,2 +241,3 @@ """Pretty print the record | ||
| minor_population_indices: Collection[int] = [], | ||
| min_dp: int | None = None, | ||
| ): | ||
@@ -221,2 +260,3 @@ """ | ||
| within which to look for minor populations | ||
| min_dp (int, optional): Minimum depth to consider a call. Default is None | ||
| """ | ||
@@ -293,3 +333,3 @@ | ||
| for sample in record.samples.keys(): | ||
| self.records.append(VCFRecord(record, sample)) | ||
| self.records.append(VCFRecord(record, sample, min_dp)) | ||
@@ -310,3 +350,6 @@ # Find calls will ensure that no calls have same position | ||
| or "AD" in self.format_fields_metadata.keys() | ||
| ), "'COV' and 'AD' not in VCF format fields. No minor populations can be found!" | ||
| ), ( | ||
| "'COV' and 'AD' not in VCF format fields. " | ||
| "No minor populations can be found!" | ||
| ) | ||
| self._find_minor_populations() | ||
@@ -390,6 +433,2 @@ else: | ||
| allelic_depth_tag = ( | ||
| "COV" if "COV" in self.format_fields_metadata.keys() else "AD" | ||
| ) | ||
| for idx, type_ in self.calls.keys(): | ||
@@ -402,2 +441,10 @@ # Check if we've delt with this vcf already | ||
| # Pull out depth tag from the specific row's format fields | ||
| # as the file metadata isn't a guarantee of the actual fields of this row | ||
| allelic_depth_tag = ( | ||
| "COV" | ||
| if self.calls[(idx, type_)]["original_vcf_row"].get("COV", None) | ||
| else "AD" | ||
| ) | ||
| # Checking for het calls | ||
@@ -418,6 +465,10 @@ if self.calls[(idx, type_)]["call"] == "z": | ||
| # Get all of the calls | ||
| calls = [self.calls[(idx, type_)]["original_vcf_row"]["REF"]] + list( | ||
| self.calls[(idx, type_)]["original_vcf_row"]["ALTS"] | ||
| ) | ||
| if self.calls[(idx, type_)]["original_vcf_row"]["ALTS"] is None: | ||
| # Case arrises from gvcf ref calls not giving any alts | ||
| calls = [self.calls[(idx, type_)]["original_vcf_row"]["REF"]] | ||
| else: | ||
| # Get all of the calls | ||
| calls = [self.calls[(idx, type_)]["original_vcf_row"]["REF"]] + list( | ||
| self.calls[(idx, type_)]["original_vcf_row"]["ALTS"] | ||
| ) | ||
@@ -430,4 +481,7 @@ # Break down the calls as appropriate | ||
| # total_depth = self.calls[(idx, type_)]['original_vcf_row']['DP'] | ||
| total_depth = sum(dps) | ||
| # GVCF null calls get None for depth, so catch (and skip) this | ||
| if dps == [None]: | ||
| continue | ||
| else: | ||
| total_depth = sum(dps) | ||
@@ -434,0 +488,0 @@ # idx here refers to the position of this call, NOT this vcf row, so adjust |
+1
-1
| Metadata-Version: 2.1 | ||
| Name: gumpy | ||
| Version: 1.2.7 | ||
| Version: 1.3.0 | ||
| Summary: Genetics with Numpy | ||
@@ -5,0 +5,0 @@ Home-page: https://github.com/oxfordmmm/gumpy |
+2
-1
@@ -1,1 +0,2 @@ | ||
| 1.2.7 | ||
| 1.3.0 | ||
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
207504
1.22%4054
1.32%