fastpdb
Advanced tools
| name: Builds | ||
| on: | ||
| release: | ||
| types: [published] | ||
| workflow_dispatch: {} | ||
| jobs: | ||
| build: | ||
| name: Building distribution | ||
| strategy: | ||
| matrix: | ||
| include: | ||
| - os: ubuntu-latest | ||
| python: '3.7' | ||
| numpy: '1.15' | ||
| source: false | ||
| - os: ubuntu-latest | ||
| python: '3.8' | ||
| numpy: '1.16' | ||
| source: false | ||
| - os: ubuntu-latest | ||
| python: '3.9' | ||
| numpy: '1.19' | ||
| source: true | ||
| - os: macos-latest | ||
| python: '3.7' | ||
| numpy: '1.15' | ||
| source: false | ||
| - os: macos-latest | ||
| python: '3.8' | ||
| numpy: '1.16' | ||
| source: false | ||
| - os: macos-latest | ||
| python: '3.9' | ||
| numpy: '1.19' | ||
| source: false | ||
| - os: windows-latest | ||
| python: '3.7' | ||
| numpy: '1.15' | ||
| source: false | ||
| - os: windows-latest | ||
| python: '3.8' | ||
| numpy: '1.16' | ||
| source: false | ||
| - os: windows-latest | ||
| python: '3.9' | ||
| numpy: '1.19' | ||
| source: false | ||
| runs-on: ${{ matrix.os }} | ||
| defaults: | ||
| run: | ||
| shell: bash -l {0} | ||
| steps: | ||
| - uses: actions/checkout@v2 | ||
| - uses: conda-incubator/setup-miniconda@v2 | ||
| with: | ||
| activate-environment: fastpdb-dev | ||
| auto-update-conda: true | ||
| python-version: ${{ matrix.python }} | ||
| - name: Installing dependencies | ||
| run: conda install -c conda-forge numpy=$NUMPY_VERSION maturin | ||
| env: | ||
| NUMPY_VERSION: ${{ matrix.numpy }} | ||
| - if: ${{ !matrix.source }} | ||
| name: Building distribution | ||
| run: maturin build --release --no-sdist -i python -o dist | ||
| - if: ${{ matrix.source }} | ||
| name: Building distribution | ||
| run: maturin build --release -i python -o dist | ||
| - uses: actions/upload-artifact@v2 | ||
| with: | ||
| name: fastpdb distribution | ||
| path: dist//* | ||
| if-no-files-found: error |
| name: Tests | ||
| on: [push, pull_request] | ||
| jobs: | ||
| test-simple: | ||
| name: Testing | ||
| strategy: | ||
| matrix: | ||
| include: | ||
| - os: ubuntu-18.04 | ||
| python: '3.7' | ||
| numpy: '1.15' | ||
| - os: ubuntu-18.04 | ||
| python: '3.8' | ||
| numpy: '1.16' | ||
| - os: ubuntu-18.04 | ||
| python: '3.9' | ||
| numpy: '1.19' | ||
| - os: macos-latest | ||
| python: '3.7' | ||
| numpy: '1.15' | ||
| - os: macos-latest | ||
| python: '3.8' | ||
| numpy: '1.16' | ||
| - os: macos-latest | ||
| python: '3.9' | ||
| numpy: '1.19' | ||
| - os: windows-latest | ||
| python: '3.7' | ||
| numpy: '1.15' | ||
| - os: windows-latest | ||
| python: '3.8' | ||
| numpy: '1.16' | ||
| - os: windows-latest | ||
| python: '3.9' | ||
| numpy: '1.19' | ||
| runs-on: ${{ matrix.os }} | ||
| defaults: | ||
| run: | ||
| shell: bash -l {0} | ||
| steps: | ||
| - uses: actions/checkout@v2 | ||
| - uses: conda-incubator/setup-miniconda@v2 | ||
| with: | ||
| activate-environment: fastpdb-test | ||
| auto-update-conda: true | ||
| python-version: ${{ matrix.python }} | ||
| - name: Installing dependencies | ||
| run: conda install -c conda-forge numpy=$NUMPY_VERSION "biotite>=0.29" maturin pytest | ||
| env: | ||
| NUMPY_VERSION: ${{ matrix.numpy }} | ||
| - name: Building distribution | ||
| run: maturin build --release -i python -o dist | ||
| - name: Installing distribution | ||
| run: pip install .//dist//*.whl | ||
| - name: Testing code | ||
| run: pytest --assert=plain |
| __name__ = "fastpdb" | ||
| __author__ = "Patrick Kunzmann" | ||
| __all__ = ["PDBFile"] | ||
| __version__ = "1.0.0" | ||
| import numpy as np | ||
| import biotite | ||
| import biotite.structure as struc | ||
| import biotite.structure.io.pdb as pdb | ||
| from .fastpdb import PDBFile as RustPDBFile | ||
| class PDBFile(biotite.TextFile): | ||
| r""" | ||
| This class represents a PDB file. | ||
| This class only provides support for reading/writing the pure atom | ||
| information (``ATOM``, ``HETATM``, ``MODEL`` and ``ENDMDL`` | ||
| records). | ||
| ``TER`` records cannot be written. | ||
| See also | ||
| -------- | ||
| PDBxFile | ||
| Examples | ||
| -------- | ||
| Load a ``\\*.pdb`` file, modify the structure and save the new | ||
| structure into a new file: | ||
| >>> import os.path | ||
| >>> file = PDBFile.read(os.path.join(path_to_structures, "1l2y.pdb")) | ||
| >>> array_stack = file.get_structure() | ||
| >>> array_stack_mod = rotate(array_stack, [1,2,3]) | ||
| >>> file = PDBFile() | ||
| >>> file.set_structure(array_stack_mod) | ||
| >>> file.write(os.path.join(path_to_directory, "1l2y_mod.pdb")) | ||
| """ | ||
| def __init__(self): | ||
| super().__init__() | ||
| self._pdb_file = RustPDBFile([]) | ||
| @classmethod | ||
| def read(cls, file): | ||
| file = super().read(file) | ||
| file._pdb_file = RustPDBFile(file.lines) | ||
| return file | ||
| def get_model_count(self): | ||
| """ | ||
| Get the number of models contained in the PDB file. | ||
| Returns | ||
| ------- | ||
| model_count : int | ||
| The number of models. | ||
| """ | ||
| return self._pdb_file.get_model_count() | ||
| def get_coord(self, model=None): | ||
| """ | ||
| Get only the coordinates of the PDB file. | ||
| Parameters | ||
| ---------- | ||
| model : int, optional | ||
| If this parameter is given, the function will return a | ||
| 2D coordinate array from the atoms corresponding to the | ||
| given model number (starting at 1). | ||
| Negative values are used to index models starting from the | ||
| last model insted of the first model. | ||
| If this parameter is omitted, an 2D coordinate array | ||
| containing all models will be returned, even if | ||
| the structure contains only one model. | ||
| Returns | ||
| ------- | ||
| coord : ndarray, shape=(m,n,3) or shape=(n,2), dtype=float | ||
| The coordinates read from the ``ATOM`` and ``HETATM`` | ||
| records of the file. | ||
| Notes | ||
| ----- | ||
| Note that :func:`get_coord()` may output more coordinates than | ||
| the atom array (stack) from the corresponding | ||
| :func:`get_structure()` call has. | ||
| The reason for this is, that :func:`get_structure()` filters | ||
| *altloc* IDs, while `get_coord()` does not. | ||
| Examples | ||
| -------- | ||
| Read an :class:`AtomArrayStack` from multiple PDB files, where | ||
| each PDB file contains the same atoms but different positions. | ||
| This is an efficient approach when a trajectory is spread into | ||
| multiple PDB files, as done e.g. by the *Rosetta* modeling | ||
| software. | ||
| For the purpose of this example, the PDB files are created from | ||
| an existing :class:`AtomArrayStack`. | ||
| >>> import os.path | ||
| >>> from tempfile import gettempdir | ||
| >>> file_names = [] | ||
| >>> for i in range(atom_array_stack.stack_depth()): | ||
| ... pdb_file = PDBFile() | ||
| ... pdb_file.set_structure(atom_array_stack[i]) | ||
| ... file_name = os.path.join(gettempdir(), f"model_{i+1}.pdb") | ||
| ... pdb_file.write(file_name) | ||
| ... file_names.append(file_name) | ||
| >>> print(file_names) | ||
| ['...model_1.pdb', '...model_2.pdb', ..., '...model_38.pdb'] | ||
| Now the PDB files are used to create an :class:`AtomArrayStack`, | ||
| where each model represents a different model. | ||
| Construct a new :class:`AtomArrayStack` with annotations taken | ||
| from one of the created files used as template and coordinates | ||
| from all of the PDB files. | ||
| >>> template_file = PDBFile.read(file_names[0]) | ||
| >>> template = template_file.get_structure() | ||
| >>> coord = [] | ||
| >>> for i, file_name in enumerate(file_names): | ||
| ... pdb_file = PDBFile.read(file_name) | ||
| ... coord.append(pdb_file.get_coord(model=1)) | ||
| >>> new_stack = from_template(template, np.array(coord)) | ||
| The newly created :class:`AtomArrayStack` should now be equal to | ||
| the :class:`AtomArrayStack` the PDB files were created from. | ||
| >>> print(np.allclose(new_stack.coord, atom_array_stack.coord)) | ||
| True | ||
| """ | ||
| if model is None: | ||
| coord = self._pdb_file.parse_coord_multi_model() | ||
| else: | ||
| coord = self._pdb_file.parse_coord_single_model(model) | ||
| return coord | ||
| def get_structure(self, model=None, altloc="first", extra_fields=None, include_bonds=False): | ||
| """ | ||
| Get an :class:`AtomArray` or :class:`AtomArrayStack` from the PDB file. | ||
| Parameters | ||
| ---------- | ||
| model : int, optional | ||
| If this parameter is given, the function will return an | ||
| :class:`AtomArray` from the atoms corresponding to the given | ||
| model number (starting at 1). | ||
| Negative values are used to index models starting from the | ||
| last model insted of the first model. | ||
| If this parameter is omitted, an :class:`AtomArrayStack` | ||
| containing all models will be returned, even if the | ||
| structure contains only one model. | ||
| altloc : {'first', 'occupancy', 'all'} | ||
| This parameter defines how *altloc* IDs are handled: | ||
| - ``'first'`` - Use atoms that have the first | ||
| *altloc* ID appearing in a residue. | ||
| - ``'occupancy'`` - Use atoms that have the *altloc* ID | ||
| with the highest occupancy for a residue. | ||
| - ``'all'`` - Use all atoms. | ||
| Note that this leads to duplicate atoms. | ||
| When this option is chosen, the ``altloc_id`` | ||
| annotation array is added to the returned structure. | ||
| extra_fields : list of str, optional | ||
| The strings in the list are optional annotation categories | ||
| that should be stored in the output array or stack. | ||
| These are valid values: | ||
| ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and | ||
| ``'charge'``. | ||
| include_bonds : bool, optional | ||
| If set to true, a :class:`BondList` will be created for the | ||
| resulting :class:`AtomArray` containing the bond information | ||
| from the file. | ||
| All bonds have :attr:`BondType.ANY`, since the PDB format | ||
| does not support bond orders. | ||
| Returns | ||
| ------- | ||
| array : AtomArray or AtomArrayStack | ||
| The return type depends on the `model` parameter. | ||
| """ | ||
| if extra_fields is not None: | ||
| include_atom_id = "atom_id" in extra_fields | ||
| include_b_factor = "b_factor" in extra_fields | ||
| include_occupancy = "occupancy" in extra_fields | ||
| include_charge = "charge" in extra_fields | ||
| else: | ||
| include_atom_id = False | ||
| include_b_factor = False | ||
| include_occupancy = False | ||
| include_charge = False | ||
| if include_bonds: | ||
| # Required for mapping the bonded atom IDs to atom indices | ||
| include_atom_id = True | ||
| if altloc == "occupancy": | ||
| include_occupancy = True | ||
| if model is None: | ||
| coord = self._pdb_file.parse_coord_multi_model() | ||
| annotations = self._pdb_file.parse_annotations( | ||
| 1, | ||
| include_atom_id, include_b_factor, | ||
| include_occupancy, include_charge | ||
| ) | ||
| else: | ||
| coord = self._pdb_file.parse_coord_single_model(model) | ||
| annotations = self._pdb_file.parse_annotations( | ||
| model, | ||
| include_atom_id, include_b_factor, | ||
| include_occupancy, include_charge | ||
| ) | ||
| ( | ||
| chain_id, res_id, ins_code, res_name, | ||
| hetero, atom_name, element, altloc_id, | ||
| atom_id, b_factor, occupancy, charge | ||
| ) = annotations | ||
| # Interpret uint32 arrays as unicode arrays | ||
| chain_id = np.frombuffer(chain_id, dtype="U4") | ||
| ins_code = np.frombuffer(ins_code, dtype="U1") | ||
| res_name = np.frombuffer(res_name, dtype="U3") | ||
| atom_name = np.frombuffer(atom_name, dtype="U6") | ||
| element = np.frombuffer(element, dtype="U2") | ||
| altloc_id = np.frombuffer(altloc_id, dtype="U1") | ||
| if coord.ndim == 3: | ||
| atoms = struc.AtomArrayStack(coord.shape[0], coord.shape[1]) | ||
| atoms.coord = coord | ||
| else: | ||
| atoms = struc.AtomArray(coord.shape[0]) | ||
| atoms.coord = coord | ||
| atoms.chain_id = chain_id | ||
| atoms.res_id = res_id | ||
| atoms.ins_code = ins_code | ||
| atoms.res_name = res_name | ||
| atoms.hetero = hetero | ||
| atoms.atom_name = atom_name | ||
| atoms.element = element | ||
| for field in (extra_fields if extra_fields is not None else []): | ||
| if field == "atom_id": | ||
| # Copy is necessary to avoid double masking in | ||
| # later altloc ID filtering | ||
| atoms.set_annotation("atom_id", atom_id.copy()) | ||
| elif field == "charge": | ||
| atoms.set_annotation("charge", charge) | ||
| elif field == "occupancy": | ||
| atoms.set_annotation("occupancy", occupancy) | ||
| elif field == "b_factor": | ||
| atoms.set_annotation("b_factor", b_factor) | ||
| else: | ||
| raise ValueError(f"Unknown extra field: {field}") | ||
| box = self._pdb_file.parse_box() | ||
| if box is None: | ||
| atoms.box = None | ||
| else: | ||
| len_a, len_b, len_c, alpha, beta, gamma = box | ||
| box = struc.vectors_from_unitcell( | ||
| len_a, len_b, len_c, | ||
| np.deg2rad(alpha), np.deg2rad(beta), np.deg2rad(gamma) | ||
| ) | ||
| if isinstance(atoms, struc.AtomArray): | ||
| atoms.box = box | ||
| else: | ||
| atoms.box = np.repeat( | ||
| box[np.newaxis, ...], atoms.stack_depth(), axis=0 | ||
| ) | ||
| # Filter altloc IDs | ||
| if altloc == "occupancy": | ||
| filter = struc.filter_highest_occupancy_altloc( | ||
| atoms, altloc_id, occupancy | ||
| ) | ||
| atoms = atoms[..., filter] | ||
| atom_id = atom_id[filter] if atom_id is not None else None | ||
| elif altloc == "first": | ||
| filter = struc.filter_first_altloc(atoms, altloc_id) | ||
| atoms = atoms[..., filter] | ||
| atom_id = atom_id[filter] if atom_id is not None else None | ||
| elif altloc == "all": | ||
| atoms.set_annotation("altloc_id", altloc_id) | ||
| else: | ||
| raise ValueError(f"'{altloc}' is not a valid 'altloc' option") | ||
| if include_bonds: | ||
| bond_list = struc.BondList( | ||
| atoms.array_length(), self._pdb_file.parse_bonds(atom_id) | ||
| ) | ||
| bond_list = bond_list.merge(struc.connect_via_residue_names( | ||
| atoms, | ||
| # The information for non-hetero residues and water | ||
| # are not part of CONECT records | ||
| (~atoms.hetero) | struc.filter_solvent(atoms) | ||
| )) | ||
| # Remove bond order from inter residue bonds for consistency | ||
| bond_list.remove_bond_order() | ||
| atoms.bonds = bond_list | ||
| return atoms | ||
| def set_structure(self, atoms): | ||
| """ | ||
| Set the :class:`AtomArray` or :class:`AtomArrayStack` for the | ||
| file. | ||
| This makes also use of the optional annotation arrays | ||
| ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and ``'charge'``. | ||
| If the atom array (stack) contains the annotation ``'atom_id'``, | ||
| these values will be used for atom numbering instead of | ||
| continuous numbering. | ||
| Parameters | ||
| ---------- | ||
| array : AtomArray or AtomArrayStack | ||
| The array or stack to be saved into this file. If a stack | ||
| is given, each array in the stack is saved as separate | ||
| model. | ||
| Notes | ||
| ----- | ||
| If `array` has an associated :class:`BondList`, ``CONECT`` | ||
| records are also written for all non-water hetero residues | ||
| and all inter-residue connections. | ||
| """ | ||
| # Reset lines of text | ||
| self._pdb_file = RustPDBFile([]) | ||
| # Write 'CRYST1' record | ||
| if atoms.box is not None: | ||
| box = atoms.box | ||
| if box.ndim == 3: | ||
| box = box[0] | ||
| len_a, len_b, len_c, alpha, beta, gamma \ | ||
| = struc.unitcell_from_vectors(box) | ||
| self._pdb_file.write_box( | ||
| len_a, len_b, len_c, | ||
| np.rad2deg(alpha), np.rad2deg(beta), np.rad2deg(gamma) | ||
| ) | ||
| # Write 'ATOM' and 'MODEL' records | ||
| # Convert Unicode arrays into uint32 arrays for usage in Rust | ||
| chain_id = np.frombuffer(atoms.chain_id, dtype=np.uint32).reshape(-1, 4) | ||
| ins_code = np.frombuffer(atoms.ins_code, dtype=np.uint32).reshape(-1, 1) | ||
| res_name = np.frombuffer(atoms.res_name, dtype=np.uint32).reshape(-1, 3) | ||
| atom_name = np.frombuffer(atoms.atom_name, dtype=np.uint32).reshape(-1, 6) | ||
| element = np.frombuffer(atoms.element, dtype=np.uint32).reshape(-1, 2) | ||
| categories = atoms.get_annotation_categories() | ||
| atom_id = atoms.atom_id if "atom_id" in categories else None | ||
| b_factor = atoms.b_factor if "b_factor" in categories else None | ||
| occupancy = atoms.occupancy if "occupancy" in categories else None | ||
| charge = atoms.charge if "charge" in categories else None | ||
| # Convert to correct dtype for Rust function call, if necessary | ||
| coord = atoms.coord.astype(np.float32, copy=False) | ||
| res_id = atoms.res_id.astype(np.int64, copy=False) | ||
| hetero = atoms.hetero.astype(bool, copy=False) | ||
| if atom_id is not None: | ||
| atom_id = atom_id.astype(np.int64, copy=False) | ||
| if b_factor is not None: | ||
| b_factor = b_factor.astype(np.float64, copy=False) | ||
| if occupancy is not None: | ||
| occupancy = occupancy.astype(np.float64, copy=False) | ||
| if charge is not None: | ||
| charge = charge.astype(np.int64, copy=False) | ||
| if isinstance(atoms, struc.AtomArray): | ||
| self._pdb_file.write_single_model( | ||
| coord, chain_id, res_id, ins_code, | ||
| res_name, hetero, atom_name, element, | ||
| atom_id, b_factor, occupancy, charge | ||
| ) | ||
| elif isinstance(atoms, struc.AtomArrayStack): | ||
| self._pdb_file.write_multi_model( | ||
| coord, chain_id, res_id, ins_code, | ||
| res_name, hetero, atom_name, element, | ||
| atom_id, b_factor, occupancy, charge | ||
| ) | ||
| else: | ||
| raise TypeError( | ||
| f"Expected AtomArray or AtomArrayStack, " | ||
| f"but got {type(atoms).__name__}" | ||
| ) | ||
| # Write 'CONECT' records | ||
| if atoms.bonds is not None: | ||
| # Only non-water hetero records and connections between | ||
| # residues are added to the records | ||
| hetero_indices = np.where(atoms.hetero & ~struc.filter_solvent(atoms))[0] | ||
| bond_array = atoms.bonds.as_array() | ||
| bond_array = bond_array[ | ||
| np.isin(bond_array[:,0], hetero_indices) | | ||
| np.isin(bond_array[:,1], hetero_indices) | | ||
| (atoms.res_id [bond_array[:,0]] != atoms.res_id [bond_array[:,1]]) | | ||
| (atoms.chain_id[bond_array[:,0]] != atoms.chain_id[bond_array[:,1]]) | ||
| ] | ||
| # Bond type is unused since PDB does not support bond orders | ||
| bonds, _ = struc.BondList( | ||
| atoms.array_length(), bond_array | ||
| ).get_all_bonds() | ||
| atom_id = np.arange(1, atoms.array_length()+1, dtype=np.int64) \ | ||
| if atom_id is None else atom_id | ||
| self._pdb_file.write_bonds( | ||
| bonds.astype(np.int32, copy=False), atom_id | ||
| ) | ||
| self.lines = self._pdb_file.lines |
+1
-3
@@ -23,6 +23,4 @@ import time | ||
| pdb_file_path = rcsb.fetch(PDB_ID, "pdb", tempfile.gettempdir()) | ||
| pdb_file_path = rcsb.fetch(PDB_ID, "pdb", ".") | ||
| #pdb_file_path = rcsb.fetch(PDB_ID, "pdb", tempfile.gettempdir()) | ||
| fastpdb_runtimes = {} | ||
@@ -29,0 +27,0 @@ biotite_runtimes = {} |
+61
-76
@@ -9,3 +9,3 @@ <?xml version="1.0" encoding="utf-8" standalone="no"?> | ||
| <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> | ||
| <dc:date>2021-10-06T16:47:02.199004</dc:date> | ||
| <dc:date>2021-10-07T18:07:40.322632</dc:date> | ||
| <dc:format>image/svg+xml</dc:format> | ||
@@ -42,6 +42,6 @@ <dc:creator> | ||
| <g id="patch_3"> | ||
| <path clip-path="url(#p7e47fec0d4)" d="M 83.987045 256.16 | ||
| <path clip-path="url(#p3f95facf3d)" d="M 83.987045 256.16 | ||
| L 129.611136 256.16 | ||
| L 129.611136 184.500366 | ||
| L 83.987045 184.500366 | ||
| L 129.611136 176.801224 | ||
| L 83.987045 176.801224 | ||
| z | ||
@@ -51,6 +51,6 @@ " style="fill:#0a6efd;stroke:#000000;stroke-linejoin:miter;"/> | ||
| <g id="patch_4"> | ||
| <path clip-path="url(#p7e47fec0d4)" d="M 266.483409 256.16 | ||
| <path clip-path="url(#p3f95facf3d)" d="M 266.483409 256.16 | ||
| L 312.1075 256.16 | ||
| L 312.1075 149.988014 | ||
| L 266.483409 149.988014 | ||
| L 312.1075 128.837933 | ||
| L 266.483409 128.837933 | ||
| z | ||
@@ -60,3 +60,3 @@ " style="fill:#0a6efd;stroke:#000000;stroke-linejoin:miter;"/> | ||
| <g id="patch_5"> | ||
| <path clip-path="url(#p7e47fec0d4)" d="M 448.979773 256.16 | ||
| <path clip-path="url(#p3f95facf3d)" d="M 448.979773 256.16 | ||
| L 494.603864 256.16 | ||
@@ -69,6 +69,6 @@ L 494.603864 35.069091 | ||
| <g id="patch_6"> | ||
| <path clip-path="url(#p7e47fec0d4)" d="M 129.611136 256.16 | ||
| <path clip-path="url(#p3f95facf3d)" d="M 129.611136 256.16 | ||
| L 175.235227 256.16 | ||
| L 175.235227 236.952723 | ||
| L 129.611136 236.952723 | ||
| L 175.235227 235.507445 | ||
| L 129.611136 235.507445 | ||
| z | ||
@@ -78,6 +78,6 @@ " style="fill:#e1301d;stroke:#000000;stroke-linejoin:miter;"/> | ||
| <g id="patch_7"> | ||
| <path clip-path="url(#p7e47fec0d4)" d="M 312.1075 256.16 | ||
| <path clip-path="url(#p3f95facf3d)" d="M 312.1075 256.16 | ||
| L 357.731591 256.16 | ||
| L 357.731591 236.952723 | ||
| L 312.1075 236.952723 | ||
| L 357.731591 235.507445 | ||
| L 312.1075 235.507445 | ||
| z | ||
@@ -87,6 +87,6 @@ " style="fill:#e1301d;stroke:#000000;stroke-linejoin:miter;"/> | ||
| <g id="patch_8"> | ||
| <path clip-path="url(#p7e47fec0d4)" d="M 494.603864 256.16 | ||
| <path clip-path="url(#p3f95facf3d)" d="M 494.603864 256.16 | ||
| L 540.227955 256.16 | ||
| L 540.227955 236.952723 | ||
| L 494.603864 236.952723 | ||
| L 540.227955 235.507445 | ||
| L 494.603864 235.507445 | ||
| z | ||
@@ -101,6 +101,6 @@ " style="fill:#e1301d;stroke:#000000;stroke-linejoin:miter;"/> | ||
| L 0 3.5 | ||
| " id="m1acca59ad8" style="stroke:#000000;stroke-width:0.8;"/> | ||
| " id="m37efa55f1f" style="stroke:#000000;stroke-width:0.8;"/> | ||
| </defs> | ||
| <g> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="129.611136" xlink:href="#m1acca59ad8" y="256.16"/> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="129.611136" xlink:href="#m37efa55f1f" y="256.16"/> | ||
| </g> | ||
@@ -301,3 +301,3 @@ </g> | ||
| <g> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="312.1075" xlink:href="#m1acca59ad8" y="256.16"/> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="312.1075" xlink:href="#m37efa55f1f" y="256.16"/> | ||
| </g> | ||
@@ -363,3 +363,3 @@ </g> | ||
| <g> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="494.603864" xlink:href="#m1acca59ad8" y="256.16"/> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="494.603864" xlink:href="#m37efa55f1f" y="256.16"/> | ||
| </g> | ||
@@ -443,6 +443,6 @@ </g> | ||
| L -3.5 0 | ||
| " id="me6d8a45849" style="stroke:#000000;stroke-width:0.8;"/> | ||
| " id="m7fcad94283" style="stroke:#000000;stroke-width:0.8;"/> | ||
| </defs> | ||
| <g> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="236.952723"/> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="235.507445"/> | ||
| </g> | ||
@@ -452,3 +452,3 @@ </g> | ||
| <!-- 1× --> | ||
| <g transform="translate(36.484375 241.511786)scale(0.12 -0.12)"> | ||
| <g transform="translate(36.484375 240.066507)scale(0.12 -0.12)"> | ||
| <defs> | ||
@@ -493,3 +493,3 @@ <path d="M 794 531 | ||
| <g> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="217.745446"/> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="214.854889"/> | ||
| </g> | ||
@@ -499,3 +499,3 @@ </g> | ||
| <!-- 2× --> | ||
| <g transform="translate(36.484375 222.304509)scale(0.12 -0.12)"> | ||
| <g transform="translate(36.484375 219.413952)scale(0.12 -0.12)"> | ||
| <defs> | ||
@@ -535,3 +535,3 @@ <path d="M 1228 531 | ||
| <g> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="198.53817"/> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="194.202334"/> | ||
| </g> | ||
@@ -541,3 +541,3 @@ </g> | ||
| <!-- 3× --> | ||
| <g transform="translate(36.484375 203.097232)scale(0.12 -0.12)"> | ||
| <g transform="translate(36.484375 198.761396)scale(0.12 -0.12)"> | ||
| <defs> | ||
@@ -585,3 +585,3 @@ <path d="M 2597 2516 | ||
| <g> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="179.330893"/> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="173.549778"/> | ||
| </g> | ||
@@ -591,3 +591,3 @@ </g> | ||
| <!-- 4× --> | ||
| <g transform="translate(36.484375 183.889955)scale(0.12 -0.12)"> | ||
| <g transform="translate(36.484375 178.108841)scale(0.12 -0.12)"> | ||
| <defs> | ||
@@ -622,3 +622,3 @@ <path d="M 2419 4116 | ||
| <g> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="160.123616"/> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="152.897223"/> | ||
| </g> | ||
@@ -628,3 +628,3 @@ </g> | ||
| <!-- 5× --> | ||
| <g transform="translate(36.484375 164.682679)scale(0.12 -0.12)"> | ||
| <g transform="translate(36.484375 157.456285)scale(0.12 -0.12)"> | ||
| <defs> | ||
@@ -665,3 +665,3 @@ <path d="M 691 4666 | ||
| <g> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="140.916339"/> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="132.244667"/> | ||
| </g> | ||
@@ -671,3 +671,3 @@ </g> | ||
| <!-- 6× --> | ||
| <g transform="translate(36.484375 145.475402)scale(0.12 -0.12)"> | ||
| <g transform="translate(36.484375 136.80373)scale(0.12 -0.12)"> | ||
| <defs> | ||
@@ -713,3 +713,3 @@ <path d="M 2113 2584 | ||
| <g> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="121.709063"/> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="111.592112"/> | ||
| </g> | ||
@@ -719,3 +719,3 @@ </g> | ||
| <!-- 7× --> | ||
| <g transform="translate(36.484375 126.268125)scale(0.12 -0.12)"> | ||
| <g transform="translate(36.484375 116.151174)scale(0.12 -0.12)"> | ||
| <defs> | ||
@@ -741,3 +741,3 @@ <path d="M 525 4666 | ||
| <g> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="102.501786"/> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="90.939556"/> | ||
| </g> | ||
@@ -747,3 +747,3 @@ </g> | ||
| <!-- 8× --> | ||
| <g transform="translate(36.484375 107.060848)scale(0.12 -0.12)"> | ||
| <g transform="translate(36.484375 95.498619)scale(0.12 -0.12)"> | ||
| <defs> | ||
@@ -798,3 +798,3 @@ <path d="M 2034 2216 | ||
| <g> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="83.294509"/> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="70.287001"/> | ||
| </g> | ||
@@ -804,3 +804,3 @@ </g> | ||
| <!-- 9× --> | ||
| <g transform="translate(36.484375 87.853572)scale(0.12 -0.12)"> | ||
| <g transform="translate(36.484375 74.846063)scale(0.12 -0.12)"> | ||
| <defs> | ||
@@ -846,3 +846,3 @@ <path d="M 703 97 | ||
| <g> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="64.087232"/> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="49.634445"/> | ||
| </g> | ||
@@ -852,3 +852,3 @@ </g> | ||
| <!-- 10× --> | ||
| <g transform="translate(28.849375 68.646295)scale(0.12 -0.12)"> | ||
| <g transform="translate(28.849375 54.193508)scale(0.12 -0.12)"> | ||
| <defs> | ||
@@ -886,3 +886,3 @@ <path d="M 2034 4250 | ||
| <g> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="44.879956"/> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="28.98189"/> | ||
| </g> | ||
@@ -892,3 +892,3 @@ </g> | ||
| <!-- 11× --> | ||
| <g transform="translate(28.849375 49.439018)scale(0.12 -0.12)"> | ||
| <g transform="translate(28.849375 33.540952)scale(0.12 -0.12)"> | ||
| <use xlink:href="#DejaVuSans-31"/> | ||
@@ -900,18 +900,3 @@ <use x="63.623047" xlink:href="#DejaVuSans-31"/> | ||
| </g> | ||
| <g id="ytick_12"> | ||
| <g id="line2d_15"> | ||
| <g> | ||
| <use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="25.672679"/> | ||
| </g> | ||
| </g> | ||
| <g id="text_15"> | ||
| <!-- 12× --> | ||
| <g transform="translate(28.849375 30.231741)scale(0.12 -0.12)"> | ||
| <use xlink:href="#DejaVuSans-31"/> | ||
| <use x="63.623047" xlink:href="#DejaVuSans-32"/> | ||
| <use x="127.246094" xlink:href="#DejaVuSans-d7"/> | ||
| </g> | ||
| </g> | ||
| </g> | ||
| <g id="text_16"> | ||
| <g id="text_15"> | ||
| <!-- Speedup --> | ||
@@ -1030,5 +1015,5 @@ <g transform="translate(22.35375 160.9825)rotate(-90)scale(0.12 -0.12)"> | ||
| </g> | ||
| <g id="text_17"> | ||
| <!-- 3.7× --> | ||
| <g transform="translate(92.229403 179.004741)scale(0.12 -0.12)"> | ||
| <g id="text_16"> | ||
| <!-- 3.8× --> | ||
| <g transform="translate(92.229403 171.305599)scale(0.12 -0.12)"> | ||
| <defs> | ||
@@ -1045,22 +1030,22 @@ <path d="M 684 794 | ||
| <use x="63.623047" xlink:href="#DejaVuSans-2e"/> | ||
| <use x="95.410156" xlink:href="#DejaVuSans-37"/> | ||
| <use x="95.410156" xlink:href="#DejaVuSans-38"/> | ||
| <use x="159.033203" xlink:href="#DejaVuSans-d7"/> | ||
| </g> | ||
| </g> | ||
| <g id="text_18"> | ||
| <!-- 5.5× --> | ||
| <g transform="translate(274.725767 144.492389)scale(0.12 -0.12)"> | ||
| <use xlink:href="#DejaVuSans-35"/> | ||
| <g id="text_17"> | ||
| <!-- 6.2× --> | ||
| <g transform="translate(274.725767 123.342308)scale(0.12 -0.12)"> | ||
| <use xlink:href="#DejaVuSans-36"/> | ||
| <use x="63.623047" xlink:href="#DejaVuSans-2e"/> | ||
| <use x="95.410156" xlink:href="#DejaVuSans-35"/> | ||
| <use x="95.410156" xlink:href="#DejaVuSans-32"/> | ||
| <use x="159.033203" xlink:href="#DejaVuSans-d7"/> | ||
| </g> | ||
| </g> | ||
| <g id="text_19"> | ||
| <!-- 11.5× --> | ||
| <g id="text_18"> | ||
| <!-- 10.7× --> | ||
| <g transform="translate(453.404631 29.573466)scale(0.12 -0.12)"> | ||
| <use xlink:href="#DejaVuSans-31"/> | ||
| <use x="63.623047" xlink:href="#DejaVuSans-31"/> | ||
| <use x="63.623047" xlink:href="#DejaVuSans-30"/> | ||
| <use x="127.246094" xlink:href="#DejaVuSans-2e"/> | ||
| <use x="159.033203" xlink:href="#DejaVuSans-35"/> | ||
| <use x="159.033203" xlink:href="#DejaVuSans-37"/> | ||
| <use x="222.65625" xlink:href="#DejaVuSans-d7"/> | ||
@@ -1078,3 +1063,3 @@ </g> | ||
| </g> | ||
| <g id="text_20"> | ||
| <g id="text_19"> | ||
| <!-- fastpdb --> | ||
@@ -1179,3 +1164,3 @@ <g transform="translate(105.575 32.878125)scale(0.12 -0.12)"> | ||
| </g> | ||
| <g id="text_21"> | ||
| <g id="text_20"> | ||
| <!-- biotite --> | ||
@@ -1196,3 +1181,3 @@ <g transform="translate(105.575 50.491875)scale(0.12 -0.12)"> | ||
| <defs> | ||
| <clipPath id="p7e47fec0d4"> | ||
| <clipPath id="p3f95facf3d"> | ||
| <rect height="243.2" width="501.865" x="61.175" y="12.96"/> | ||
@@ -1199,0 +1184,0 @@ </clipPath> |
+4
-1
| [package] | ||
| name = "fastpdb" | ||
| version = "0.1.0" | ||
| version = "1.0.0" | ||
| edition = "2018" | ||
| [package.metadata.maturin] | ||
| python-source = "python-src" | ||
| [dependencies] | ||
@@ -7,0 +10,0 @@ numpy = "0.14" |
+5
-5
| Metadata-Version: 2.1 | ||
| Name: fastpdb | ||
| Version: 0.1.0 | ||
| Classifier: Development Status :: 1 - Planning | ||
| Version: 1.0.0 | ||
| Classifier: Development Status :: 5 - Production/Stable | ||
| Classifier: Intended Audience :: Science/Research | ||
@@ -50,6 +50,6 @@ Classifier: License :: OSI Approved :: BSD License | ||
| Description-Content-Type: text/x-rst; charset=UTF-8 | ||
| Project-URL: homepage, https://github.com/biotite-dev/fastpdb | ||
| Project-URL: repository, https://github.com/biotite-dev/fastpdb | ||
| Project-URL: homepage, https://github.com/biotite-dev/fastpdb | ||
| .. image:: logo.svg | ||
| .. image:: https://raw.githubusercontent.com/biotite-dev/fastpdb/main/logo.svg | ||
| :width: 300 | ||
@@ -100,5 +100,5 @@ :align: center | ||
| .. image:: benchmark.svg | ||
| .. image:: https://raw.githubusercontent.com/biotite-dev/fastpdb/main/benchmark.svg | ||
| :width: 800 | ||
| :align: center | ||
+2
-2
| [project] | ||
| name = "fastpdb" | ||
| version = "0.1.0" | ||
| version = "1.0.0" | ||
| description = "A high performance drop-in replacement for Biotite's PDBFile." | ||
@@ -16,3 +16,3 @@ readme = "README.rst" | ||
| classifiers = [ | ||
| "Development Status :: 1 - Planning", | ||
| "Development Status :: 5 - Production/Stable", | ||
| "Intended Audience :: Science/Research", | ||
@@ -19,0 +19,0 @@ "License :: OSI Approved :: BSD License", |
+2
-2
@@ -1,2 +0,2 @@ | ||
| .. image:: logo.svg | ||
| .. image:: https://raw.githubusercontent.com/biotite-dev/fastpdb/main/logo.svg | ||
| :width: 300 | ||
@@ -47,4 +47,4 @@ :align: center | ||
| .. image:: benchmark.svg | ||
| .. image:: https://raw.githubusercontent.com/biotite-dev/fastpdb/main/benchmark.svg | ||
| :width: 800 | ||
| :align: center |
+35
-44
@@ -6,2 +6,3 @@ //! Low-level PDB file parsing and writing. | ||
| use std::collections::HashMap; | ||
| use std::cmp::Ordering; | ||
| use ndarray::{Array, Ix1, Ix2, Ix3}; | ||
@@ -52,3 +53,3 @@ use pyo3::prelude::*; | ||
| fn get_model_count(&self) -> usize { | ||
| return self.get_model_start_indices().len() | ||
| self.get_model_start_indices().len() | ||
| } | ||
@@ -195,3 +196,3 @@ | ||
| write_string_to_array(&mut res_name, atom_i, line[17..20].trim()); | ||
| hetero[atom_i] = if &line[0..4] == "ATOM" { false } else { true }; | ||
| hetero[atom_i] = !(&line[0..4] == "ATOM"); | ||
| write_string_to_array(&mut atom_name, atom_i, line[12..16].trim()); | ||
@@ -220,3 +221,3 @@ write_string_to_array(&mut element, atom_i, line[76..78].trim()); | ||
| else { | ||
| number = raw_number.to_digit(10).ok_or( | ||
| number = raw_number.to_digit(10).ok_or_else( || | ||
| BadStructureError::new_err(format!( | ||
@@ -359,11 +360,7 @@ "'{}' cannot be parsed into a number", raw_number | ||
| let c = arr[i]; | ||
| if c > 0 { | ||
| format!("{:1}+", c) | ||
| match c.cmp(&0) { | ||
| Ordering::Greater => format!("{:1}+", c), | ||
| Ordering::Less => format!("{:1}-", -c), | ||
| Ordering::Equal => String::from(" ") | ||
| } | ||
| else if c < 0 { | ||
| format!("{:1}-", -c) | ||
| } | ||
| else { | ||
| String::from(" ") | ||
| } | ||
| }), | ||
@@ -430,11 +427,7 @@ )); | ||
| let c = arr[i]; | ||
| if c > 0 { | ||
| format!("{:1}+", c) | ||
| match c.cmp(&0) { | ||
| Ordering::Greater => format!("{:1}+", c), | ||
| Ordering::Less => format!("{:1}-", -c), | ||
| Ordering::Equal => String::from(" ") | ||
| } | ||
| else if c < 0 { | ||
| format!("{:1}-", -c) | ||
| } | ||
| else { | ||
| String::from(" ") | ||
| } | ||
| }), | ||
@@ -528,16 +521,16 @@ )); | ||
| } | ||
| coord[[atom_i, 0]] = line[30..38].trim().parse().or_else(|_| | ||
| Err(BadStructureError::new_err(format!( | ||
| coord[[atom_i, 0]] = line[30..38].trim().parse().map_err(|_| | ||
| BadStructureError::new_err(format!( | ||
| "'{}' cannot be parsed into a float", line[30..38].trim() | ||
| ))) | ||
| )) | ||
| )?; | ||
| coord[[atom_i, 1]] = line[38..46].trim().parse().or_else(|_| | ||
| Err(BadStructureError::new_err(format!( | ||
| coord[[atom_i, 1]] = line[38..46].trim().parse().map_err(|_| | ||
| BadStructureError::new_err(format!( | ||
| "'{}' cannot be parsed into a float", line[38..46].trim() | ||
| ))) | ||
| )) | ||
| )?; | ||
| coord[[atom_i, 2]] = line[46..54].trim().parse().or_else(|_| | ||
| Err(BadStructureError::new_err(format!( | ||
| coord[[atom_i, 2]] = line[46..54].trim().parse().map_err(|_| | ||
| BadStructureError::new_err(format!( | ||
| "'{}' cannot be parsed into a float", line[46..54].trim() | ||
| ))) | ||
| )) | ||
| )?; | ||
@@ -581,3 +574,3 @@ } | ||
| // In these cases model starting index is set to 0 | ||
| if model_start_i.len() == 0 { | ||
| if model_start_i.is_empty() { | ||
| model_start_i = vec![0] | ||
@@ -593,13 +586,11 @@ } | ||
| model: isize, | ||
| model_start_i: &Vec<usize>) -> PyResult<(usize, usize)> { | ||
| model_start_i: &[usize]) -> PyResult<(usize, usize)> { | ||
| let model_i: isize; | ||
| if model > 0 { | ||
| model_i = model - 1; | ||
| } | ||
| else if model < 0 { | ||
| model_i = model_start_i.len() as isize + model; | ||
| } | ||
| else { | ||
| return Err(exceptions::PyValueError::new_err("Model index must not be 0")); | ||
| } | ||
| match model.cmp(&0) { | ||
| Ordering::Greater => model_i = model - 1, | ||
| Ordering::Less => model_i = model_start_i.len() as isize + model, | ||
| Ordering::Equal => return Err(exceptions::PyValueError::new_err( | ||
| "Model index must not be 0" | ||
| )), | ||
| }; | ||
@@ -626,4 +617,4 @@ if model_i >= model_start_i.len() as isize || model_i < 0 { | ||
| fn get_model_length(&self, | ||
| model_start_i: &Vec<usize>, | ||
| atom_line_i: &Vec<usize>) -> PyResult<usize> { | ||
| model_start_i: &[usize], | ||
| atom_line_i: &[usize]) -> PyResult<usize> { | ||
| let n_models = model_start_i.len(); | ||
@@ -687,6 +678,6 @@ let mut length: Option<usize> = None; | ||
| fn parse_number<T: FromStr>(string: &str) -> PyResult<T> { | ||
| string.trim().parse().or_else(|_| | ||
| Err(BadStructureError::new_err(format!( | ||
| string.trim().parse().map_err(|_| | ||
| BadStructureError::new_err(format!( | ||
| "'{}' cannot be parsed into a number", string.trim() | ||
| ))) | ||
| )) | ||
| ) | ||
@@ -693,0 +684,0 @@ } |
| __name__ = "fastpdb" | ||
| __author__ = "Patrick Kunzmann" | ||
| __all__ = ["PDBFile"] | ||
| __version__ = "0.1.0" | ||
| import numpy as np | ||
| import biotite | ||
| import biotite.structure as struc | ||
| import biotite.structure.io.pdb as pdb | ||
| from .fastpdb import PDBFile as RustPDBFile | ||
| class PDBFile(biotite.TextFile): | ||
| r""" | ||
| This class represents a PDB file. | ||
| This class only provides support for reading/writing the pure atom | ||
| information (*ATOM*, *HETATM*, *MODEL* and *ENDMDL* records). *TER* | ||
| records cannot be written. | ||
| See also | ||
| -------- | ||
| PDBxFile | ||
| Examples | ||
| -------- | ||
| Load a `\\*.pdb` file, modify the structure and save the new | ||
| structure into a new file: | ||
| >>> import os.path | ||
| >>> file = PDBFile.read(os.path.join(path_to_structures, "1l2y.pdb")) | ||
| >>> array_stack = file.get_structure() | ||
| >>> array_stack_mod = rotate(array_stack, [1,2,3]) | ||
| >>> file = PDBFile() | ||
| >>> file.set_structure(array_stack_mod) | ||
| >>> file.write(os.path.join(path_to_directory, "1l2y_mod.pdb")) | ||
| """ | ||
| def __init__(self): | ||
| super().__init__() | ||
| self._pdb_file = RustPDBFile([]) | ||
| @classmethod | ||
| def read(cls, file): | ||
| file = super().read(file) | ||
| file._pdb_file = RustPDBFile(file.lines) | ||
| return file | ||
| def get_model_count(self): | ||
| """ | ||
| Get the number of models contained in the PDB file. | ||
| Returns | ||
| ------- | ||
| model_count : int | ||
| The number of models. | ||
| """ | ||
| return self._pdb_file.get_model_count() | ||
| def get_coord(self, model=None): | ||
| """ | ||
| Get only the coordinates of the PDB file. | ||
| Parameters | ||
| ---------- | ||
| model : int, optional | ||
| If this parameter is given, the function will return a | ||
| 2D coordinate array from the atoms corresponding to the | ||
| given model number (starting at 1). | ||
| Negative values are used to index models starting from the | ||
| last model insted of the first model. | ||
| If this parameter is omitted, an 2D coordinate array | ||
| containing all models will be returned, even if | ||
| the structure contains only one model. | ||
| Returns | ||
| ------- | ||
| coord : ndarray, shape=(m,n,3) or shape=(n,2), dtype=float | ||
| The coordinates read from the ATOM and HETATM records of the | ||
| file. | ||
| Notes | ||
| ----- | ||
| Note that :func:`get_coord()` may output more coordinates than | ||
| the atom array (stack) from the corresponding | ||
| :func:`get_structure()` call has. | ||
| The reason for this is, that :func:`get_structure()` filters | ||
| *altloc* IDs, while `get_coord()` does not. | ||
| Examples | ||
| -------- | ||
| Read an :class:`AtomArrayStack` from multiple PDB files, where | ||
| each PDB file contains the same atoms but different positions. | ||
| This is an efficient approach when a trajectory is spread into | ||
| multiple PDB files, as done e.g. by the *Rosetta* modeling | ||
| software. | ||
| For the purpose of this example, the PDB files are created from | ||
| an existing :class:`AtomArrayStack`. | ||
| >>> import os.path | ||
| >>> from tempfile import gettempdir | ||
| >>> file_names = [] | ||
| >>> for i in range(atom_array_stack.stack_depth()): | ||
| ... pdb_file = PDBFile() | ||
| ... pdb_file.set_structure(atom_array_stack[i]) | ||
| ... file_name = os.path.join(gettempdir(), f"model_{i+1}.pdb") | ||
| ... pdb_file.write(file_name) | ||
| ... file_names.append(file_name) | ||
| >>> print(file_names) | ||
| ['...model_1.pdb', '...model_2.pdb', ..., '...model_38.pdb'] | ||
| Now the PDB files are used to create an :class:`AtomArrayStack`, | ||
| where each model represents a different model. | ||
| Construct a new :class:`AtomArrayStack` with annotations taken | ||
| from one of the created files used as template and coordinates | ||
| from all of the PDB files. | ||
| >>> template_file = PDBFile.read(file_names[0]) | ||
| >>> template = template_file.get_structure() | ||
| >>> coord = [] | ||
| >>> for i, file_name in enumerate(file_names): | ||
| ... pdb_file = PDBFile.read(file_name) | ||
| ... coord.append(pdb_file.get_coord(model=1)) | ||
| >>> new_stack = from_template(template, np.array(coord)) | ||
| The newly created :class:`AtomArrayStack` should now be equal to | ||
| the :class:`AtomArrayStack` the PDB files were created from. | ||
| >>> print(np.allclose(new_stack.coord, atom_array_stack.coord)) | ||
| True | ||
| """ | ||
| if model is None: | ||
| coord = self._pdb_file.parse_coord_multi_model() | ||
| else: | ||
| coord = self._pdb_file.parse_coord_single_model(model) | ||
| return coord | ||
| def get_structure(self, model=None, altloc="first", extra_fields=None, include_bonds=False): | ||
| """ | ||
| Get an :class:`AtomArray` or :class:`AtomArrayStack` from the PDB file. | ||
| Parameters | ||
| ---------- | ||
| model : int, optional | ||
| If this parameter is given, the function will return an | ||
| :class:`AtomArray` from the atoms corresponding to the given | ||
| model number (starting at 1). | ||
| Negative values are used to index models starting from the | ||
| last model insted of the first model. | ||
| If this parameter is omitted, an :class:`AtomArrayStack` | ||
| containing all models will be returned, even if the | ||
| structure contains only one model. | ||
| altloc : {'first', 'occupancy', 'all'} | ||
| This parameter defines how *altloc* IDs are handled: | ||
| - ``'first'`` - Use atoms that have the first | ||
| *altloc* ID appearing in a residue. | ||
| - ``'occupancy'`` - Use atoms that have the *altloc* ID | ||
| with the highest occupancy for a residue. | ||
| - ``'all'`` - Use all atoms. | ||
| Note that this leads to duplicate atoms. | ||
| When this option is chosen, the ``altloc_id`` | ||
| annotation array is added to the returned structure. | ||
| extra_fields : list of str, optional | ||
| The strings in the list are optional annotation categories | ||
| that should be stored in the output array or stack. | ||
| These are valid values: | ||
| ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and | ||
| ``'charge'``. | ||
| include_bonds : bool, optional | ||
| If set to true, a :class:`BondList` will be created for the | ||
| resulting :class:`AtomArray` containing the bond information | ||
| from the file. | ||
| All bonds have :attr:`BondType.ANY`, since the PDB format | ||
| does not support bond orders. | ||
| Returns | ||
| ------- | ||
| array : AtomArray or AtomArrayStack | ||
| The return type depends on the `model` parameter. | ||
| """ | ||
| if extra_fields is not None: | ||
| include_atom_id = "atom_id" in extra_fields | ||
| include_b_factor = "b_factor" in extra_fields | ||
| include_occupancy = "occupancy" in extra_fields | ||
| include_charge = "charge" in extra_fields | ||
| else: | ||
| include_atom_id = False | ||
| include_b_factor = False | ||
| include_occupancy = False | ||
| include_charge = False | ||
| if include_bonds: | ||
| # Required for mapping the bonded atom IDs to atom indices | ||
| include_atom_id = True | ||
| if altloc == "occupancy": | ||
| include_occupancy = True | ||
| if model is None: | ||
| coord = self._pdb_file.parse_coord_multi_model() | ||
| annotations = self._pdb_file.parse_annotations( | ||
| 1, | ||
| include_atom_id, include_b_factor, | ||
| include_occupancy, include_charge | ||
| ) | ||
| else: | ||
| coord = self._pdb_file.parse_coord_single_model(model) | ||
| annotations = self._pdb_file.parse_annotations( | ||
| model, | ||
| include_atom_id, include_b_factor, | ||
| include_occupancy, include_charge | ||
| ) | ||
| ( | ||
| chain_id, res_id, ins_code, res_name, | ||
| hetero, atom_name, element, altloc_id, | ||
| atom_id, b_factor, occupancy, charge | ||
| ) = annotations | ||
| # Interpret uint32 arrays as unicode arrays | ||
| chain_id = np.frombuffer(chain_id, dtype="U4") | ||
| ins_code = np.frombuffer(ins_code, dtype="U1") | ||
| res_name = np.frombuffer(res_name, dtype="U3") | ||
| atom_name = np.frombuffer(atom_name, dtype="U6") | ||
| element = np.frombuffer(element, dtype="U2") | ||
| altloc_id = np.frombuffer(altloc_id, dtype="U1") | ||
| if coord.ndim == 3: | ||
| atoms = struc.AtomArrayStack(coord.shape[0], coord.shape[1]) | ||
| atoms.coord = coord | ||
| else: | ||
| atoms = struc.AtomArray(coord.shape[0]) | ||
| atoms.coord = coord | ||
| atoms.chain_id = chain_id | ||
| atoms.res_id = res_id | ||
| atoms.ins_code = ins_code | ||
| atoms.res_name = res_name | ||
| atoms.hetero = hetero | ||
| atoms.atom_name = atom_name | ||
| atoms.element = element | ||
| for field in (extra_fields if extra_fields is not None else []): | ||
| if field == "atom_id": | ||
| # Copy is necessary to avoid double masking in | ||
| # later altloc ID filtering | ||
| atoms.set_annotation("atom_id", atom_id.copy()) | ||
| elif field == "charge": | ||
| atoms.set_annotation("charge", charge) | ||
| elif field == "occupancy": | ||
| atoms.set_annotation("occupancy", occupancy) | ||
| elif field == "b_factor": | ||
| atoms.set_annotation("b_factor", b_factor) | ||
| else: | ||
| raise ValueError(f"Unknown extra field: {field}") | ||
| box = self._pdb_file.parse_box() | ||
| if box is None: | ||
| atoms.box = None | ||
| else: | ||
| len_a, len_b, len_c, alpha, beta, gamma = box | ||
| box = struc.vectors_from_unitcell( | ||
| len_a, len_b, len_c, | ||
| np.deg2rad(alpha), np.deg2rad(beta), np.deg2rad(gamma) | ||
| ) | ||
| if isinstance(atoms, struc.AtomArray): | ||
| atoms.box = box | ||
| else: | ||
| atoms.box = np.repeat( | ||
| box[np.newaxis, ...], atoms.stack_depth(), axis=0 | ||
| ) | ||
| # Filter altloc IDs | ||
| if altloc == "occupancy": | ||
| filter = struc.filter_highest_occupancy_altloc( | ||
| atoms, altloc_id, occupancy | ||
| ) | ||
| atoms = atoms[..., filter] | ||
| atom_id = atom_id[filter] if atom_id is not None else None | ||
| elif altloc == "first": | ||
| filter = struc.filter_first_altloc(atoms, altloc_id) | ||
| atoms = atoms[..., filter] | ||
| atom_id = atom_id[filter] if atom_id is not None else None | ||
| elif altloc == "all": | ||
| atoms.set_annotation("altloc_id", altloc_id) | ||
| else: | ||
| raise ValueError(f"'{altloc}' is not a valid 'altloc' option") | ||
| if include_bonds: | ||
| bond_list = struc.BondList( | ||
| atoms.array_length(), self._pdb_file.parse_bonds(atom_id) | ||
| ) | ||
| bond_list = bond_list.merge(struc.connect_via_residue_names( | ||
| atoms, | ||
| # The information for non-hetero residues and water | ||
| # are not part of CONECT records | ||
| (~atoms.hetero) | struc.filter_solvent(atoms) | ||
| )) | ||
| # Remove bond order from inter residue bonds for consistency | ||
| bond_list.remove_bond_order() | ||
| atoms.bonds = bond_list | ||
| return atoms | ||
| def set_structure(self, atoms): | ||
| """ | ||
| Set the :class:`AtomArray` or :class:`AtomArrayStack` for the | ||
| file. | ||
| This makes also use of the optional annotation arrays | ||
| ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and ``'charge'``. | ||
| If the atom array (stack) contains the annotation ``'atom_id'``, | ||
| these values will be used for atom numbering instead of | ||
| continuous numbering. | ||
| Parameters | ||
| ---------- | ||
| array : AtomArray or AtomArrayStack | ||
| The array or stack to be saved into this file. If a stack | ||
| is given, each array in the stack is saved as separate | ||
| model. | ||
| Notes | ||
| ----- | ||
| If `array` has an associated :class:`BondList`, ``CONECT`` | ||
| records are also written for all non-water hetero residues | ||
| and all inter-residue connections. | ||
| """ | ||
| # Reset lines of text | ||
| self._pdb_file = RustPDBFile([]) | ||
| # Write 'CRYST1' record | ||
| if atoms.box is not None: | ||
| box = atoms.box | ||
| if box.ndim == 3: | ||
| box = box[0] | ||
| len_a, len_b, len_c, alpha, beta, gamma \ | ||
| = struc.unitcell_from_vectors(box) | ||
| self._pdb_file.write_box( | ||
| len_a, len_b, len_c, | ||
| np.rad2deg(alpha), np.rad2deg(beta), np.rad2deg(gamma) | ||
| ) | ||
| # Write 'ATOM' and 'MODEL' records | ||
| # Convert Unicode arrays into uint32 arrays for usage in Rust | ||
| chain_id = np.frombuffer(atoms.chain_id, dtype=np.uint32).reshape(-1, 4) | ||
| ins_code = np.frombuffer(atoms.ins_code, dtype=np.uint32).reshape(-1, 1) | ||
| res_name = np.frombuffer(atoms.res_name, dtype=np.uint32).reshape(-1, 3) | ||
| atom_name = np.frombuffer(atoms.atom_name, dtype=np.uint32).reshape(-1, 6) | ||
| element = np.frombuffer(atoms.element, dtype=np.uint32).reshape(-1, 2) | ||
| categories = atoms.get_annotation_categories() | ||
| atom_id = atoms.atom_id if "atom_id" in categories else None | ||
| b_factor = atoms.b_factor if "b_factor" in categories else None | ||
| occupancy = atoms.occupancy if "occupancy" in categories else None | ||
| charge = atoms.charge if "charge" in categories else None | ||
| if isinstance(atoms, struc.AtomArray): | ||
| self._pdb_file.write_single_model( | ||
| atoms.coord, chain_id, atoms.res_id, ins_code, | ||
| res_name, atoms.hetero, atom_name, element, | ||
| atom_id, b_factor, occupancy, charge | ||
| ) | ||
| elif isinstance(atoms, struc.AtomArrayStack): | ||
| self._pdb_file.write_multi_model( | ||
| atoms.coord, chain_id, atoms.res_id, ins_code, | ||
| res_name, atoms.hetero, atom_name, element, | ||
| atom_id, b_factor, occupancy, charge | ||
| ) | ||
| else: | ||
| raise TypeError( | ||
| f"Expected AtomArray or AtomArrayStack, " | ||
| f"but got {type(atoms).__name__}" | ||
| ) | ||
| # Write 'CONECT' records | ||
| if atoms.bonds is not None: | ||
| # Only non-water hetero records and connections between | ||
| # residues are added to the records | ||
| hetero_indices = np.where(atoms.hetero & ~struc.filter_solvent(atoms))[0] | ||
| bond_array = atoms.bonds.as_array() | ||
| bond_array = bond_array[ | ||
| np.isin(bond_array[:,0], hetero_indices) | | ||
| np.isin(bond_array[:,1], hetero_indices) | | ||
| (atoms.res_id [bond_array[:,0]] != atoms.res_id [bond_array[:,1]]) | | ||
| (atoms.chain_id[bond_array[:,0]] != atoms.chain_id[bond_array[:,1]]) | ||
| ] | ||
| # Bond type is unused since PDB does not support bond orders | ||
| bonds, _ = struc.BondList( | ||
| atoms.array_length(), bond_array | ||
| ).get_all_bonds() | ||
| atom_id = np.arange(1, atoms.array_length()+1) if atom_id is None else atom_id | ||
| self._pdb_file.write_bonds( | ||
| bonds, atom_id | ||
| ) | ||
| self.lines = self._pdb_file.lines |
-8
| import fastpdb | ||
| in_file = fastpdb.PDBFile.read("1AKI.pdb") | ||
| atom_array = in_file.get_structure(model=1) | ||
| out_file = fastpdb.PDBFile() | ||
| out_file.set_structure(atom_array) | ||
| out_file.write("test.pdb") |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
13273169
0.03%30
3.45%586
1.38%