aiondata
Advanced tools
+80
-10
@@ -0,1 +1,2 @@ | ||
| from collections import OrderedDict | ||
| import io | ||
@@ -10,2 +11,3 @@ from typing import Optional, Generator, Union, Tuple | ||
| from .datasets import GeneratedDataset | ||
| import polars as pl | ||
@@ -21,12 +23,65 @@ | ||
| COLLECTION = "bindingdb" | ||
| SCHEMA = [ | ||
| # Primary Identifiers | ||
| ("BindingDB Reactant_set_id", pl.Float64), | ||
| ("BindingDB MonomerID", pl.Float64), | ||
| ("BindingDB Entry DOI", pl.Utf8), | ||
| # Target-related Fields | ||
| ("Target Name", pl.Utf8), | ||
| ("UniProt (SwissProt) Primary ID of Target Chain", pl.Utf8), | ||
| ("UniProt (SwissProt) Secondary ID(s) of Target Chain", pl.Utf8), | ||
| ("UniProt (SwissProt) Alternative ID(s) of Target Chain", pl.Utf8), | ||
| ("UniProt (SwissProt) Recommended Name of Target Chain", pl.Utf8), | ||
| ("UniProt (SwissProt) Entry Name of Target Chain", pl.Utf8), | ||
| ("UniProt (TrEMBL) Primary ID of Target Chain", pl.Utf8), | ||
| ("UniProt (TrEMBL) Secondary ID(s) of Target Chain", pl.Utf8), | ||
| ("UniProt (TrEMBL) Alternative ID(s) of Target Chain", pl.Utf8), | ||
| ("UniProt (TrEMBL) Submitted Name of Target Chain", pl.Utf8), | ||
| ("UniProt (TrEMBL) Entry Name of Target Chain", pl.Utf8), | ||
| ("Target Source Organism According to Curator or DataSource", pl.Utf8), | ||
| ("BindingDB Target Chain Sequence", pl.Utf8), | ||
| # Ligand-related Fields | ||
| ("Ligand InChI", pl.Utf8), | ||
| ("Ligand InChI Key", pl.Utf8), | ||
| ("BindingDB Ligand Name", pl.Utf8), | ||
| ("SMILES", pl.Utf8), | ||
| ("PubChem CID of Ligand", pl.Float64), | ||
| ("PubChem SID of Ligand", pl.Float64), | ||
| ("ChEBI ID of Ligand", pl.Utf8), | ||
| ("ChEMBL ID of Ligand", pl.Utf8), | ||
| ("DrugBank ID of Ligand", pl.Utf8), | ||
| ("IUPHAR_GRAC ID of Ligand", pl.Utf8), | ||
| ("KEGG ID of Ligand", pl.Utf8), | ||
| ("ZINC ID of Ligand", pl.Utf8), | ||
| ("Ligand HET ID in PDB", pl.Utf8), | ||
| ("PDB ID(s) for Ligand-Target Complex", pl.Utf8), | ||
| ("PDB ID(s) of Target Chain", pl.Utf8), | ||
| # Interaction and Binding Affinity Measurements | ||
| ("Ki (nM)", pl.Float64), | ||
| ("IC50 (nM)", pl.Float64), | ||
| ("Kd (nM)", pl.Float64), | ||
| ("EC50 (nM)", pl.Float64), | ||
| ("kon (M-1-s-1)", pl.Float64), | ||
| ("koff (s-1)", pl.Float64), | ||
| # Experimental Conditions | ||
| ("pH", pl.Float64), | ||
| ("Temp C", pl.Float64), | ||
| # Miscellaneous | ||
| ( | ||
| "Number of Protein Chains in Target (bigger than 1 implies a multichain complex)", | ||
| pl.Float64, | ||
| ), | ||
| # Links and References | ||
| ("Link to Ligand in BindingDB", pl.Utf8), | ||
| ("Link to Ligand-Target Pair in BindingDB", pl.Utf8), | ||
| ("From", pl.Utf8), | ||
| ("Curation/DataSource", pl.Utf8), | ||
| ("Article DOI", pl.Utf8), | ||
| ("PMID", pl.Utf8), | ||
| ("PubChem AID", pl.Utf8), | ||
| ("Patent Number", pl.Utf8), | ||
| ("Authors", pl.Utf8), | ||
| ("Institution", pl.Utf8), | ||
| ] | ||
| float_fields = { | ||
| "Ki (nM)", | ||
| "IC50 (nM)", | ||
| "Kd (nM)", | ||
| "EC50 (nM)", | ||
| "kon (M-1-s-1)", | ||
| "koff (s-1)", | ||
| } | ||
| def __init__(self, fd: Optional[io.BufferedReader] = None): | ||
@@ -62,3 +117,18 @@ """ | ||
| """ | ||
| if prop_name in BindingDB.float_fields: | ||
| float_fields = { | ||
| name | ||
| for name, dtype in self.SCHEMA | ||
| if isinstance(dtype, (pl.Float64, pl.Float32)) | ||
| } | ||
| if value == "": | ||
| return None | ||
| # Fudge numbers that are greater or less than a value | ||
| if value[0] == ">": | ||
| return float(value[1:]) * 1.01 | ||
| if value[0] == "<": | ||
| return float(value[1:]) * 0.99 | ||
| if "NV" in value: | ||
| return None | ||
| if prop_name in float_fields: | ||
| try: | ||
@@ -65,0 +135,0 @@ return float(value) |
@@ -68,2 +68,7 @@ import os | ||
| def get_df(self) -> pl.DataFrame: | ||
| return pl.DataFrame(self.to_generator()) | ||
| if self.SCHEMA is None: | ||
| return pl.DataFrame( | ||
| self.to_generator(), infer_schema_length=25000, strict=False | ||
| ) | ||
| else: | ||
| return pl.DataFrame(self.to_generator(), schema=self.SCHEMA, strict=False) |
+1
-1
| Metadata-Version: 2.1 | ||
| Name: aiondata | ||
| Version: 0.4.5 | ||
| Version: 0.4.6 | ||
| Summary: A common data access layer for AI-driven drug discovery. | ||
@@ -5,0 +5,0 @@ Home-page: https://www.github.com/aion-labs/aiondata |
+1
-1
| [tool.poetry] | ||
| name = "aiondata" | ||
| version = "0.4.5" | ||
| version = "0.4.6" | ||
| description = "A common data access layer for AI-driven drug discovery." | ||
@@ -5,0 +5,0 @@ authors = ["JJ Ben-Joseph <jj@tensorspace.ai>"] |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
53884
6.44%655
12.93%