You're Invited:Meet the Socket Team at RSAC and BSidesSF 2026, March 23–26.RSVP
Socket
Book a DemoSign in
Socket

fastpdb

Package Overview
Dependencies
Maintainers
2
Versions
9
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

fastpdb - pypi Package Compare versions

Comparing version
0.1.0
to
1.0.0
+76
.github/workflows/build.yml
name: Builds
on:
release:
types: [published]
workflow_dispatch: {}
jobs:
build:
name: Building distribution
strategy:
matrix:
include:
- os: ubuntu-latest
python: '3.7'
numpy: '1.15'
source: false
- os: ubuntu-latest
python: '3.8'
numpy: '1.16'
source: false
- os: ubuntu-latest
python: '3.9'
numpy: '1.19'
source: true
- os: macos-latest
python: '3.7'
numpy: '1.15'
source: false
- os: macos-latest
python: '3.8'
numpy: '1.16'
source: false
- os: macos-latest
python: '3.9'
numpy: '1.19'
source: false
- os: windows-latest
python: '3.7'
numpy: '1.15'
source: false
- os: windows-latest
python: '3.8'
numpy: '1.16'
source: false
- os: windows-latest
python: '3.9'
numpy: '1.19'
source: false
runs-on: ${{ matrix.os }}
defaults:
run:
shell: bash -l {0}
steps:
- uses: actions/checkout@v2
- uses: conda-incubator/setup-miniconda@v2
with:
activate-environment: fastpdb-dev
auto-update-conda: true
python-version: ${{ matrix.python }}
- name: Installing dependencies
run: conda install -c conda-forge numpy=$NUMPY_VERSION maturin
env:
NUMPY_VERSION: ${{ matrix.numpy }}
- if: ${{ !matrix.source }}
name: Building distribution
run: maturin build --release --no-sdist -i python -o dist
- if: ${{ matrix.source }}
name: Building distribution
run: maturin build --release -i python -o dist
- uses: actions/upload-artifact@v2
with:
name: fastpdb distribution
path: dist//*
if-no-files-found: error
name: Tests
on: [push, pull_request]
jobs:
test-simple:
name: Testing
strategy:
matrix:
include:
- os: ubuntu-18.04
python: '3.7'
numpy: '1.15'
- os: ubuntu-18.04
python: '3.8'
numpy: '1.16'
- os: ubuntu-18.04
python: '3.9'
numpy: '1.19'
- os: macos-latest
python: '3.7'
numpy: '1.15'
- os: macos-latest
python: '3.8'
numpy: '1.16'
- os: macos-latest
python: '3.9'
numpy: '1.19'
- os: windows-latest
python: '3.7'
numpy: '1.15'
- os: windows-latest
python: '3.8'
numpy: '1.16'
- os: windows-latest
python: '3.9'
numpy: '1.19'
runs-on: ${{ matrix.os }}
defaults:
run:
shell: bash -l {0}
steps:
- uses: actions/checkout@v2
- uses: conda-incubator/setup-miniconda@v2
with:
activate-environment: fastpdb-test
auto-update-conda: true
python-version: ${{ matrix.python }}
- name: Installing dependencies
run: conda install -c conda-forge numpy=$NUMPY_VERSION "biotite>=0.29" maturin pytest
env:
NUMPY_VERSION: ${{ matrix.numpy }}
- name: Building distribution
run: maturin build --release -i python -o dist
- name: Installing distribution
run: pip install .//dist//*.whl
- name: Testing code
run: pytest --assert=plain
__name__ = "fastpdb"
__author__ = "Patrick Kunzmann"
__all__ = ["PDBFile"]
__version__ = "1.0.0"
import numpy as np
import biotite
import biotite.structure as struc
import biotite.structure.io.pdb as pdb
from .fastpdb import PDBFile as RustPDBFile
class PDBFile(biotite.TextFile):
r"""
This class represents a PDB file.
This class only provides support for reading/writing the pure atom
information (``ATOM``, ``HETATM``, ``MODEL`` and ``ENDMDL``
records).
``TER`` records cannot be written.
See also
--------
PDBxFile
Examples
--------
Load a ``\\*.pdb`` file, modify the structure and save the new
structure into a new file:
>>> import os.path
>>> file = PDBFile.read(os.path.join(path_to_structures, "1l2y.pdb"))
>>> array_stack = file.get_structure()
>>> array_stack_mod = rotate(array_stack, [1,2,3])
>>> file = PDBFile()
>>> file.set_structure(array_stack_mod)
>>> file.write(os.path.join(path_to_directory, "1l2y_mod.pdb"))
"""
def __init__(self):
super().__init__()
self._pdb_file = RustPDBFile([])
@classmethod
def read(cls, file):
file = super().read(file)
file._pdb_file = RustPDBFile(file.lines)
return file
def get_model_count(self):
"""
Get the number of models contained in the PDB file.
Returns
-------
model_count : int
The number of models.
"""
return self._pdb_file.get_model_count()
def get_coord(self, model=None):
"""
Get only the coordinates of the PDB file.
Parameters
----------
model : int, optional
If this parameter is given, the function will return a
2D coordinate array from the atoms corresponding to the
given model number (starting at 1).
Negative values are used to index models starting from the
last model insted of the first model.
If this parameter is omitted, an 2D coordinate array
containing all models will be returned, even if
the structure contains only one model.
Returns
-------
coord : ndarray, shape=(m,n,3) or shape=(n,2), dtype=float
The coordinates read from the ``ATOM`` and ``HETATM``
records of the file.
Notes
-----
Note that :func:`get_coord()` may output more coordinates than
the atom array (stack) from the corresponding
:func:`get_structure()` call has.
The reason for this is, that :func:`get_structure()` filters
*altloc* IDs, while `get_coord()` does not.
Examples
--------
Read an :class:`AtomArrayStack` from multiple PDB files, where
each PDB file contains the same atoms but different positions.
This is an efficient approach when a trajectory is spread into
multiple PDB files, as done e.g. by the *Rosetta* modeling
software.
For the purpose of this example, the PDB files are created from
an existing :class:`AtomArrayStack`.
>>> import os.path
>>> from tempfile import gettempdir
>>> file_names = []
>>> for i in range(atom_array_stack.stack_depth()):
... pdb_file = PDBFile()
... pdb_file.set_structure(atom_array_stack[i])
... file_name = os.path.join(gettempdir(), f"model_{i+1}.pdb")
... pdb_file.write(file_name)
... file_names.append(file_name)
>>> print(file_names)
['...model_1.pdb', '...model_2.pdb', ..., '...model_38.pdb']
Now the PDB files are used to create an :class:`AtomArrayStack`,
where each model represents a different model.
Construct a new :class:`AtomArrayStack` with annotations taken
from one of the created files used as template and coordinates
from all of the PDB files.
>>> template_file = PDBFile.read(file_names[0])
>>> template = template_file.get_structure()
>>> coord = []
>>> for i, file_name in enumerate(file_names):
... pdb_file = PDBFile.read(file_name)
... coord.append(pdb_file.get_coord(model=1))
>>> new_stack = from_template(template, np.array(coord))
The newly created :class:`AtomArrayStack` should now be equal to
the :class:`AtomArrayStack` the PDB files were created from.
>>> print(np.allclose(new_stack.coord, atom_array_stack.coord))
True
"""
if model is None:
coord = self._pdb_file.parse_coord_multi_model()
else:
coord = self._pdb_file.parse_coord_single_model(model)
return coord
def get_structure(self, model=None, altloc="first", extra_fields=None, include_bonds=False):
"""
Get an :class:`AtomArray` or :class:`AtomArrayStack` from the PDB file.
Parameters
----------
model : int, optional
If this parameter is given, the function will return an
:class:`AtomArray` from the atoms corresponding to the given
model number (starting at 1).
Negative values are used to index models starting from the
last model insted of the first model.
If this parameter is omitted, an :class:`AtomArrayStack`
containing all models will be returned, even if the
structure contains only one model.
altloc : {'first', 'occupancy', 'all'}
This parameter defines how *altloc* IDs are handled:
- ``'first'`` - Use atoms that have the first
*altloc* ID appearing in a residue.
- ``'occupancy'`` - Use atoms that have the *altloc* ID
with the highest occupancy for a residue.
- ``'all'`` - Use all atoms.
Note that this leads to duplicate atoms.
When this option is chosen, the ``altloc_id``
annotation array is added to the returned structure.
extra_fields : list of str, optional
The strings in the list are optional annotation categories
that should be stored in the output array or stack.
These are valid values:
``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
``'charge'``.
include_bonds : bool, optional
If set to true, a :class:`BondList` will be created for the
resulting :class:`AtomArray` containing the bond information
from the file.
All bonds have :attr:`BondType.ANY`, since the PDB format
does not support bond orders.
Returns
-------
array : AtomArray or AtomArrayStack
The return type depends on the `model` parameter.
"""
if extra_fields is not None:
include_atom_id = "atom_id" in extra_fields
include_b_factor = "b_factor" in extra_fields
include_occupancy = "occupancy" in extra_fields
include_charge = "charge" in extra_fields
else:
include_atom_id = False
include_b_factor = False
include_occupancy = False
include_charge = False
if include_bonds:
# Required for mapping the bonded atom IDs to atom indices
include_atom_id = True
if altloc == "occupancy":
include_occupancy = True
if model is None:
coord = self._pdb_file.parse_coord_multi_model()
annotations = self._pdb_file.parse_annotations(
1,
include_atom_id, include_b_factor,
include_occupancy, include_charge
)
else:
coord = self._pdb_file.parse_coord_single_model(model)
annotations = self._pdb_file.parse_annotations(
model,
include_atom_id, include_b_factor,
include_occupancy, include_charge
)
(
chain_id, res_id, ins_code, res_name,
hetero, atom_name, element, altloc_id,
atom_id, b_factor, occupancy, charge
) = annotations
# Interpret uint32 arrays as unicode arrays
chain_id = np.frombuffer(chain_id, dtype="U4")
ins_code = np.frombuffer(ins_code, dtype="U1")
res_name = np.frombuffer(res_name, dtype="U3")
atom_name = np.frombuffer(atom_name, dtype="U6")
element = np.frombuffer(element, dtype="U2")
altloc_id = np.frombuffer(altloc_id, dtype="U1")
if coord.ndim == 3:
atoms = struc.AtomArrayStack(coord.shape[0], coord.shape[1])
atoms.coord = coord
else:
atoms = struc.AtomArray(coord.shape[0])
atoms.coord = coord
atoms.chain_id = chain_id
atoms.res_id = res_id
atoms.ins_code = ins_code
atoms.res_name = res_name
atoms.hetero = hetero
atoms.atom_name = atom_name
atoms.element = element
for field in (extra_fields if extra_fields is not None else []):
if field == "atom_id":
# Copy is necessary to avoid double masking in
# later altloc ID filtering
atoms.set_annotation("atom_id", atom_id.copy())
elif field == "charge":
atoms.set_annotation("charge", charge)
elif field == "occupancy":
atoms.set_annotation("occupancy", occupancy)
elif field == "b_factor":
atoms.set_annotation("b_factor", b_factor)
else:
raise ValueError(f"Unknown extra field: {field}")
box = self._pdb_file.parse_box()
if box is None:
atoms.box = None
else:
len_a, len_b, len_c, alpha, beta, gamma = box
box = struc.vectors_from_unitcell(
len_a, len_b, len_c,
np.deg2rad(alpha), np.deg2rad(beta), np.deg2rad(gamma)
)
if isinstance(atoms, struc.AtomArray):
atoms.box = box
else:
atoms.box = np.repeat(
box[np.newaxis, ...], atoms.stack_depth(), axis=0
)
# Filter altloc IDs
if altloc == "occupancy":
filter = struc.filter_highest_occupancy_altloc(
atoms, altloc_id, occupancy
)
atoms = atoms[..., filter]
atom_id = atom_id[filter] if atom_id is not None else None
elif altloc == "first":
filter = struc.filter_first_altloc(atoms, altloc_id)
atoms = atoms[..., filter]
atom_id = atom_id[filter] if atom_id is not None else None
elif altloc == "all":
atoms.set_annotation("altloc_id", altloc_id)
else:
raise ValueError(f"'{altloc}' is not a valid 'altloc' option")
if include_bonds:
bond_list = struc.BondList(
atoms.array_length(), self._pdb_file.parse_bonds(atom_id)
)
bond_list = bond_list.merge(struc.connect_via_residue_names(
atoms,
# The information for non-hetero residues and water
# are not part of CONECT records
(~atoms.hetero) | struc.filter_solvent(atoms)
))
# Remove bond order from inter residue bonds for consistency
bond_list.remove_bond_order()
atoms.bonds = bond_list
return atoms
def set_structure(self, atoms):
"""
Set the :class:`AtomArray` or :class:`AtomArrayStack` for the
file.
This makes also use of the optional annotation arrays
``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and ``'charge'``.
If the atom array (stack) contains the annotation ``'atom_id'``,
these values will be used for atom numbering instead of
continuous numbering.
Parameters
----------
array : AtomArray or AtomArrayStack
The array or stack to be saved into this file. If a stack
is given, each array in the stack is saved as separate
model.
Notes
-----
If `array` has an associated :class:`BondList`, ``CONECT``
records are also written for all non-water hetero residues
and all inter-residue connections.
"""
# Reset lines of text
self._pdb_file = RustPDBFile([])
# Write 'CRYST1' record
if atoms.box is not None:
box = atoms.box
if box.ndim == 3:
box = box[0]
len_a, len_b, len_c, alpha, beta, gamma \
= struc.unitcell_from_vectors(box)
self._pdb_file.write_box(
len_a, len_b, len_c,
np.rad2deg(alpha), np.rad2deg(beta), np.rad2deg(gamma)
)
# Write 'ATOM' and 'MODEL' records
# Convert Unicode arrays into uint32 arrays for usage in Rust
chain_id = np.frombuffer(atoms.chain_id, dtype=np.uint32).reshape(-1, 4)
ins_code = np.frombuffer(atoms.ins_code, dtype=np.uint32).reshape(-1, 1)
res_name = np.frombuffer(atoms.res_name, dtype=np.uint32).reshape(-1, 3)
atom_name = np.frombuffer(atoms.atom_name, dtype=np.uint32).reshape(-1, 6)
element = np.frombuffer(atoms.element, dtype=np.uint32).reshape(-1, 2)
categories = atoms.get_annotation_categories()
atom_id = atoms.atom_id if "atom_id" in categories else None
b_factor = atoms.b_factor if "b_factor" in categories else None
occupancy = atoms.occupancy if "occupancy" in categories else None
charge = atoms.charge if "charge" in categories else None
# Convert to correct dtype for Rust function call, if necessary
coord = atoms.coord.astype(np.float32, copy=False)
res_id = atoms.res_id.astype(np.int64, copy=False)
hetero = atoms.hetero.astype(bool, copy=False)
if atom_id is not None:
atom_id = atom_id.astype(np.int64, copy=False)
if b_factor is not None:
b_factor = b_factor.astype(np.float64, copy=False)
if occupancy is not None:
occupancy = occupancy.astype(np.float64, copy=False)
if charge is not None:
charge = charge.astype(np.int64, copy=False)
if isinstance(atoms, struc.AtomArray):
self._pdb_file.write_single_model(
coord, chain_id, res_id, ins_code,
res_name, hetero, atom_name, element,
atom_id, b_factor, occupancy, charge
)
elif isinstance(atoms, struc.AtomArrayStack):
self._pdb_file.write_multi_model(
coord, chain_id, res_id, ins_code,
res_name, hetero, atom_name, element,
atom_id, b_factor, occupancy, charge
)
else:
raise TypeError(
f"Expected AtomArray or AtomArrayStack, "
f"but got {type(atoms).__name__}"
)
# Write 'CONECT' records
if atoms.bonds is not None:
# Only non-water hetero records and connections between
# residues are added to the records
hetero_indices = np.where(atoms.hetero & ~struc.filter_solvent(atoms))[0]
bond_array = atoms.bonds.as_array()
bond_array = bond_array[
np.isin(bond_array[:,0], hetero_indices) |
np.isin(bond_array[:,1], hetero_indices) |
(atoms.res_id [bond_array[:,0]] != atoms.res_id [bond_array[:,1]]) |
(atoms.chain_id[bond_array[:,0]] != atoms.chain_id[bond_array[:,1]])
]
# Bond type is unused since PDB does not support bond orders
bonds, _ = struc.BondList(
atoms.array_length(), bond_array
).get_all_bonds()
atom_id = np.arange(1, atoms.array_length()+1, dtype=np.int64) \
if atom_id is None else atom_id
self._pdb_file.write_bonds(
bonds.astype(np.int32, copy=False), atom_id
)
self.lines = self._pdb_file.lines
+1
-3

@@ -23,6 +23,4 @@ import time

pdb_file_path = rcsb.fetch(PDB_ID, "pdb", tempfile.gettempdir())
pdb_file_path = rcsb.fetch(PDB_ID, "pdb", ".")
#pdb_file_path = rcsb.fetch(PDB_ID, "pdb", tempfile.gettempdir())
fastpdb_runtimes = {}

@@ -29,0 +27,0 @@ biotite_runtimes = {}

@@ -9,3 +9,3 @@ <?xml version="1.0" encoding="utf-8" standalone="no"?>

<dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
<dc:date>2021-10-06T16:47:02.199004</dc:date>
<dc:date>2021-10-07T18:07:40.322632</dc:date>
<dc:format>image/svg+xml</dc:format>

@@ -42,6 +42,6 @@ <dc:creator>

<g id="patch_3">
<path clip-path="url(#p7e47fec0d4)" d="M 83.987045 256.16
<path clip-path="url(#p3f95facf3d)" d="M 83.987045 256.16
L 129.611136 256.16
L 129.611136 184.500366
L 83.987045 184.500366
L 129.611136 176.801224
L 83.987045 176.801224
z

@@ -51,6 +51,6 @@ " style="fill:#0a6efd;stroke:#000000;stroke-linejoin:miter;"/>

<g id="patch_4">
<path clip-path="url(#p7e47fec0d4)" d="M 266.483409 256.16
<path clip-path="url(#p3f95facf3d)" d="M 266.483409 256.16
L 312.1075 256.16
L 312.1075 149.988014
L 266.483409 149.988014
L 312.1075 128.837933
L 266.483409 128.837933
z

@@ -60,3 +60,3 @@ " style="fill:#0a6efd;stroke:#000000;stroke-linejoin:miter;"/>

<g id="patch_5">
<path clip-path="url(#p7e47fec0d4)" d="M 448.979773 256.16
<path clip-path="url(#p3f95facf3d)" d="M 448.979773 256.16
L 494.603864 256.16

@@ -69,6 +69,6 @@ L 494.603864 35.069091

<g id="patch_6">
<path clip-path="url(#p7e47fec0d4)" d="M 129.611136 256.16
<path clip-path="url(#p3f95facf3d)" d="M 129.611136 256.16
L 175.235227 256.16
L 175.235227 236.952723
L 129.611136 236.952723
L 175.235227 235.507445
L 129.611136 235.507445
z

@@ -78,6 +78,6 @@ " style="fill:#e1301d;stroke:#000000;stroke-linejoin:miter;"/>

<g id="patch_7">
<path clip-path="url(#p7e47fec0d4)" d="M 312.1075 256.16
<path clip-path="url(#p3f95facf3d)" d="M 312.1075 256.16
L 357.731591 256.16
L 357.731591 236.952723
L 312.1075 236.952723
L 357.731591 235.507445
L 312.1075 235.507445
z

@@ -87,6 +87,6 @@ " style="fill:#e1301d;stroke:#000000;stroke-linejoin:miter;"/>

<g id="patch_8">
<path clip-path="url(#p7e47fec0d4)" d="M 494.603864 256.16
<path clip-path="url(#p3f95facf3d)" d="M 494.603864 256.16
L 540.227955 256.16
L 540.227955 236.952723
L 494.603864 236.952723
L 540.227955 235.507445
L 494.603864 235.507445
z

@@ -101,6 +101,6 @@ " style="fill:#e1301d;stroke:#000000;stroke-linejoin:miter;"/>

L 0 3.5
" id="m1acca59ad8" style="stroke:#000000;stroke-width:0.8;"/>
" id="m37efa55f1f" style="stroke:#000000;stroke-width:0.8;"/>
</defs>
<g>
<use style="stroke:#000000;stroke-width:0.8;" x="129.611136" xlink:href="#m1acca59ad8" y="256.16"/>
<use style="stroke:#000000;stroke-width:0.8;" x="129.611136" xlink:href="#m37efa55f1f" y="256.16"/>
</g>

@@ -301,3 +301,3 @@ </g>

<g>
<use style="stroke:#000000;stroke-width:0.8;" x="312.1075" xlink:href="#m1acca59ad8" y="256.16"/>
<use style="stroke:#000000;stroke-width:0.8;" x="312.1075" xlink:href="#m37efa55f1f" y="256.16"/>
</g>

@@ -363,3 +363,3 @@ </g>

<g>
<use style="stroke:#000000;stroke-width:0.8;" x="494.603864" xlink:href="#m1acca59ad8" y="256.16"/>
<use style="stroke:#000000;stroke-width:0.8;" x="494.603864" xlink:href="#m37efa55f1f" y="256.16"/>
</g>

@@ -443,6 +443,6 @@ </g>

L -3.5 0
" id="me6d8a45849" style="stroke:#000000;stroke-width:0.8;"/>
" id="m7fcad94283" style="stroke:#000000;stroke-width:0.8;"/>
</defs>
<g>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="236.952723"/>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="235.507445"/>
</g>

@@ -452,3 +452,3 @@ </g>

<!-- 1× -->
<g transform="translate(36.484375 241.511786)scale(0.12 -0.12)">
<g transform="translate(36.484375 240.066507)scale(0.12 -0.12)">
<defs>

@@ -493,3 +493,3 @@ <path d="M 794 531

<g>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="217.745446"/>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="214.854889"/>
</g>

@@ -499,3 +499,3 @@ </g>

<!-- 2× -->
<g transform="translate(36.484375 222.304509)scale(0.12 -0.12)">
<g transform="translate(36.484375 219.413952)scale(0.12 -0.12)">
<defs>

@@ -535,3 +535,3 @@ <path d="M 1228 531

<g>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="198.53817"/>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="194.202334"/>
</g>

@@ -541,3 +541,3 @@ </g>

<!-- 3× -->
<g transform="translate(36.484375 203.097232)scale(0.12 -0.12)">
<g transform="translate(36.484375 198.761396)scale(0.12 -0.12)">
<defs>

@@ -585,3 +585,3 @@ <path d="M 2597 2516

<g>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="179.330893"/>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="173.549778"/>
</g>

@@ -591,3 +591,3 @@ </g>

<!-- 4× -->
<g transform="translate(36.484375 183.889955)scale(0.12 -0.12)">
<g transform="translate(36.484375 178.108841)scale(0.12 -0.12)">
<defs>

@@ -622,3 +622,3 @@ <path d="M 2419 4116

<g>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="160.123616"/>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="152.897223"/>
</g>

@@ -628,3 +628,3 @@ </g>

<!-- 5× -->
<g transform="translate(36.484375 164.682679)scale(0.12 -0.12)">
<g transform="translate(36.484375 157.456285)scale(0.12 -0.12)">
<defs>

@@ -665,3 +665,3 @@ <path d="M 691 4666

<g>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="140.916339"/>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="132.244667"/>
</g>

@@ -671,3 +671,3 @@ </g>

<!-- 6× -->
<g transform="translate(36.484375 145.475402)scale(0.12 -0.12)">
<g transform="translate(36.484375 136.80373)scale(0.12 -0.12)">
<defs>

@@ -713,3 +713,3 @@ <path d="M 2113 2584

<g>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="121.709063"/>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="111.592112"/>
</g>

@@ -719,3 +719,3 @@ </g>

<!-- 7× -->
<g transform="translate(36.484375 126.268125)scale(0.12 -0.12)">
<g transform="translate(36.484375 116.151174)scale(0.12 -0.12)">
<defs>

@@ -741,3 +741,3 @@ <path d="M 525 4666

<g>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="102.501786"/>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="90.939556"/>
</g>

@@ -747,3 +747,3 @@ </g>

<!-- 8× -->
<g transform="translate(36.484375 107.060848)scale(0.12 -0.12)">
<g transform="translate(36.484375 95.498619)scale(0.12 -0.12)">
<defs>

@@ -798,3 +798,3 @@ <path d="M 2034 2216

<g>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="83.294509"/>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="70.287001"/>
</g>

@@ -804,3 +804,3 @@ </g>

<!-- 9× -->
<g transform="translate(36.484375 87.853572)scale(0.12 -0.12)">
<g transform="translate(36.484375 74.846063)scale(0.12 -0.12)">
<defs>

@@ -846,3 +846,3 @@ <path d="M 703 97

<g>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="64.087232"/>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="49.634445"/>
</g>

@@ -852,3 +852,3 @@ </g>

<!-- 10× -->
<g transform="translate(28.849375 68.646295)scale(0.12 -0.12)">
<g transform="translate(28.849375 54.193508)scale(0.12 -0.12)">
<defs>

@@ -886,3 +886,3 @@ <path d="M 2034 4250

<g>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="44.879956"/>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="28.98189"/>
</g>

@@ -892,3 +892,3 @@ </g>

<!-- 11× -->
<g transform="translate(28.849375 49.439018)scale(0.12 -0.12)">
<g transform="translate(28.849375 33.540952)scale(0.12 -0.12)">
<use xlink:href="#DejaVuSans-31"/>

@@ -900,18 +900,3 @@ <use x="63.623047" xlink:href="#DejaVuSans-31"/>

</g>
<g id="ytick_12">
<g id="line2d_15">
<g>
<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="25.672679"/>
</g>
</g>
<g id="text_15">
<!-- 12× -->
<g transform="translate(28.849375 30.231741)scale(0.12 -0.12)">
<use xlink:href="#DejaVuSans-31"/>
<use x="63.623047" xlink:href="#DejaVuSans-32"/>
<use x="127.246094" xlink:href="#DejaVuSans-d7"/>
</g>
</g>
</g>
<g id="text_16">
<g id="text_15">
<!-- Speedup -->

@@ -1030,5 +1015,5 @@ <g transform="translate(22.35375 160.9825)rotate(-90)scale(0.12 -0.12)">

</g>
<g id="text_17">
<!-- 3.7× -->
<g transform="translate(92.229403 179.004741)scale(0.12 -0.12)">
<g id="text_16">
<!-- 3.8× -->
<g transform="translate(92.229403 171.305599)scale(0.12 -0.12)">
<defs>

@@ -1045,22 +1030,22 @@ <path d="M 684 794

<use x="63.623047" xlink:href="#DejaVuSans-2e"/>
<use x="95.410156" xlink:href="#DejaVuSans-37"/>
<use x="95.410156" xlink:href="#DejaVuSans-38"/>
<use x="159.033203" xlink:href="#DejaVuSans-d7"/>
</g>
</g>
<g id="text_18">
<!-- 5.5× -->
<g transform="translate(274.725767 144.492389)scale(0.12 -0.12)">
<use xlink:href="#DejaVuSans-35"/>
<g id="text_17">
<!-- 6.2× -->
<g transform="translate(274.725767 123.342308)scale(0.12 -0.12)">
<use xlink:href="#DejaVuSans-36"/>
<use x="63.623047" xlink:href="#DejaVuSans-2e"/>
<use x="95.410156" xlink:href="#DejaVuSans-35"/>
<use x="95.410156" xlink:href="#DejaVuSans-32"/>
<use x="159.033203" xlink:href="#DejaVuSans-d7"/>
</g>
</g>
<g id="text_19">
<!-- 11.5× -->
<g id="text_18">
<!-- 10.7× -->
<g transform="translate(453.404631 29.573466)scale(0.12 -0.12)">
<use xlink:href="#DejaVuSans-31"/>
<use x="63.623047" xlink:href="#DejaVuSans-31"/>
<use x="63.623047" xlink:href="#DejaVuSans-30"/>
<use x="127.246094" xlink:href="#DejaVuSans-2e"/>
<use x="159.033203" xlink:href="#DejaVuSans-35"/>
<use x="159.033203" xlink:href="#DejaVuSans-37"/>
<use x="222.65625" xlink:href="#DejaVuSans-d7"/>

@@ -1078,3 +1063,3 @@ </g>

</g>
<g id="text_20">
<g id="text_19">
<!-- fastpdb -->

@@ -1179,3 +1164,3 @@ <g transform="translate(105.575 32.878125)scale(0.12 -0.12)">

</g>
<g id="text_21">
<g id="text_20">
<!-- biotite -->

@@ -1196,3 +1181,3 @@ <g transform="translate(105.575 50.491875)scale(0.12 -0.12)">

<defs>
<clipPath id="p7e47fec0d4">
<clipPath id="p3f95facf3d">
<rect height="243.2" width="501.865" x="61.175" y="12.96"/>

@@ -1199,0 +1184,0 @@ </clipPath>

[package]
name = "fastpdb"
version = "0.1.0"
version = "1.0.0"
edition = "2018"
[package.metadata.maturin]
python-source = "python-src"
[dependencies]

@@ -7,0 +10,0 @@ numpy = "0.14"

Metadata-Version: 2.1
Name: fastpdb
Version: 0.1.0
Classifier: Development Status :: 1 - Planning
Version: 1.0.0
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Science/Research

@@ -50,6 +50,6 @@ Classifier: License :: OSI Approved :: BSD License

Description-Content-Type: text/x-rst; charset=UTF-8
Project-URL: homepage, https://github.com/biotite-dev/fastpdb
Project-URL: repository, https://github.com/biotite-dev/fastpdb
Project-URL: homepage, https://github.com/biotite-dev/fastpdb
.. image:: logo.svg
.. image:: https://raw.githubusercontent.com/biotite-dev/fastpdb/main/logo.svg
:width: 300

@@ -100,5 +100,5 @@ :align: center

.. image:: benchmark.svg
.. image:: https://raw.githubusercontent.com/biotite-dev/fastpdb/main/benchmark.svg
:width: 800
:align: center
[project]
name = "fastpdb"
version = "0.1.0"
version = "1.0.0"
description = "A high performance drop-in replacement for Biotite's PDBFile."

@@ -16,3 +16,3 @@ readme = "README.rst"

classifiers = [
"Development Status :: 1 - Planning",
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Science/Research",

@@ -19,0 +19,0 @@ "License :: OSI Approved :: BSD License",

@@ -1,2 +0,2 @@

.. image:: logo.svg
.. image:: https://raw.githubusercontent.com/biotite-dev/fastpdb/main/logo.svg
:width: 300

@@ -47,4 +47,4 @@ :align: center

.. image:: benchmark.svg
.. image:: https://raw.githubusercontent.com/biotite-dev/fastpdb/main/benchmark.svg
:width: 800
:align: center

@@ -6,2 +6,3 @@ //! Low-level PDB file parsing and writing.

use std::collections::HashMap;
use std::cmp::Ordering;
use ndarray::{Array, Ix1, Ix2, Ix3};

@@ -52,3 +53,3 @@ use pyo3::prelude::*;

fn get_model_count(&self) -> usize {
return self.get_model_start_indices().len()
self.get_model_start_indices().len()
}

@@ -195,3 +196,3 @@

write_string_to_array(&mut res_name, atom_i, line[17..20].trim());
hetero[atom_i] = if &line[0..4] == "ATOM" { false } else { true };
hetero[atom_i] = !(&line[0..4] == "ATOM");
write_string_to_array(&mut atom_name, atom_i, line[12..16].trim());

@@ -220,3 +221,3 @@ write_string_to_array(&mut element, atom_i, line[76..78].trim());

else {
number = raw_number.to_digit(10).ok_or(
number = raw_number.to_digit(10).ok_or_else( ||
BadStructureError::new_err(format!(

@@ -359,11 +360,7 @@ "'{}' cannot be parsed into a number", raw_number

let c = arr[i];
if c > 0 {
format!("{:1}+", c)
match c.cmp(&0) {
Ordering::Greater => format!("{:1}+", c),
Ordering::Less => format!("{:1}-", -c),
Ordering::Equal => String::from(" ")
}
else if c < 0 {
format!("{:1}-", -c)
}
else {
String::from(" ")
}
}),

@@ -430,11 +427,7 @@ ));

let c = arr[i];
if c > 0 {
format!("{:1}+", c)
match c.cmp(&0) {
Ordering::Greater => format!("{:1}+", c),
Ordering::Less => format!("{:1}-", -c),
Ordering::Equal => String::from(" ")
}
else if c < 0 {
format!("{:1}-", -c)
}
else {
String::from(" ")
}
}),

@@ -528,16 +521,16 @@ ));

}
coord[[atom_i, 0]] = line[30..38].trim().parse().or_else(|_|
Err(BadStructureError::new_err(format!(
coord[[atom_i, 0]] = line[30..38].trim().parse().map_err(|_|
BadStructureError::new_err(format!(
"'{}' cannot be parsed into a float", line[30..38].trim()
)))
))
)?;
coord[[atom_i, 1]] = line[38..46].trim().parse().or_else(|_|
Err(BadStructureError::new_err(format!(
coord[[atom_i, 1]] = line[38..46].trim().parse().map_err(|_|
BadStructureError::new_err(format!(
"'{}' cannot be parsed into a float", line[38..46].trim()
)))
))
)?;
coord[[atom_i, 2]] = line[46..54].trim().parse().or_else(|_|
Err(BadStructureError::new_err(format!(
coord[[atom_i, 2]] = line[46..54].trim().parse().map_err(|_|
BadStructureError::new_err(format!(
"'{}' cannot be parsed into a float", line[46..54].trim()
)))
))
)?;

@@ -581,3 +574,3 @@ }

// In these cases model starting index is set to 0
if model_start_i.len() == 0 {
if model_start_i.is_empty() {
model_start_i = vec![0]

@@ -593,13 +586,11 @@ }

model: isize,
model_start_i: &Vec<usize>) -> PyResult<(usize, usize)> {
model_start_i: &[usize]) -> PyResult<(usize, usize)> {
let model_i: isize;
if model > 0 {
model_i = model - 1;
}
else if model < 0 {
model_i = model_start_i.len() as isize + model;
}
else {
return Err(exceptions::PyValueError::new_err("Model index must not be 0"));
}
match model.cmp(&0) {
Ordering::Greater => model_i = model - 1,
Ordering::Less => model_i = model_start_i.len() as isize + model,
Ordering::Equal => return Err(exceptions::PyValueError::new_err(
"Model index must not be 0"
)),
};

@@ -626,4 +617,4 @@ if model_i >= model_start_i.len() as isize || model_i < 0 {

fn get_model_length(&self,
model_start_i: &Vec<usize>,
atom_line_i: &Vec<usize>) -> PyResult<usize> {
model_start_i: &[usize],
atom_line_i: &[usize]) -> PyResult<usize> {
let n_models = model_start_i.len();

@@ -687,6 +678,6 @@ let mut length: Option<usize> = None;

fn parse_number<T: FromStr>(string: &str) -> PyResult<T> {
string.trim().parse().or_else(|_|
Err(BadStructureError::new_err(format!(
string.trim().parse().map_err(|_|
BadStructureError::new_err(format!(
"'{}' cannot be parsed into a number", string.trim()
)))
))
)

@@ -693,0 +684,0 @@ }

__name__ = "fastpdb"
__author__ = "Patrick Kunzmann"
__all__ = ["PDBFile"]
__version__ = "0.1.0"
import numpy as np
import biotite
import biotite.structure as struc
import biotite.structure.io.pdb as pdb
from .fastpdb import PDBFile as RustPDBFile
class PDBFile(biotite.TextFile):
r"""
This class represents a PDB file.
This class only provides support for reading/writing the pure atom
information (*ATOM*, *HETATM*, *MODEL* and *ENDMDL* records). *TER*
records cannot be written.
See also
--------
PDBxFile
Examples
--------
Load a `\\*.pdb` file, modify the structure and save the new
structure into a new file:
>>> import os.path
>>> file = PDBFile.read(os.path.join(path_to_structures, "1l2y.pdb"))
>>> array_stack = file.get_structure()
>>> array_stack_mod = rotate(array_stack, [1,2,3])
>>> file = PDBFile()
>>> file.set_structure(array_stack_mod)
>>> file.write(os.path.join(path_to_directory, "1l2y_mod.pdb"))
"""
def __init__(self):
super().__init__()
self._pdb_file = RustPDBFile([])
@classmethod
def read(cls, file):
file = super().read(file)
file._pdb_file = RustPDBFile(file.lines)
return file
def get_model_count(self):
"""
Get the number of models contained in the PDB file.
Returns
-------
model_count : int
The number of models.
"""
return self._pdb_file.get_model_count()
def get_coord(self, model=None):
"""
Get only the coordinates of the PDB file.
Parameters
----------
model : int, optional
If this parameter is given, the function will return a
2D coordinate array from the atoms corresponding to the
given model number (starting at 1).
Negative values are used to index models starting from the
last model insted of the first model.
If this parameter is omitted, an 2D coordinate array
containing all models will be returned, even if
the structure contains only one model.
Returns
-------
coord : ndarray, shape=(m,n,3) or shape=(n,2), dtype=float
The coordinates read from the ATOM and HETATM records of the
file.
Notes
-----
Note that :func:`get_coord()` may output more coordinates than
the atom array (stack) from the corresponding
:func:`get_structure()` call has.
The reason for this is, that :func:`get_structure()` filters
*altloc* IDs, while `get_coord()` does not.
Examples
--------
Read an :class:`AtomArrayStack` from multiple PDB files, where
each PDB file contains the same atoms but different positions.
This is an efficient approach when a trajectory is spread into
multiple PDB files, as done e.g. by the *Rosetta* modeling
software.
For the purpose of this example, the PDB files are created from
an existing :class:`AtomArrayStack`.
>>> import os.path
>>> from tempfile import gettempdir
>>> file_names = []
>>> for i in range(atom_array_stack.stack_depth()):
... pdb_file = PDBFile()
... pdb_file.set_structure(atom_array_stack[i])
... file_name = os.path.join(gettempdir(), f"model_{i+1}.pdb")
... pdb_file.write(file_name)
... file_names.append(file_name)
>>> print(file_names)
['...model_1.pdb', '...model_2.pdb', ..., '...model_38.pdb']
Now the PDB files are used to create an :class:`AtomArrayStack`,
where each model represents a different model.
Construct a new :class:`AtomArrayStack` with annotations taken
from one of the created files used as template and coordinates
from all of the PDB files.
>>> template_file = PDBFile.read(file_names[0])
>>> template = template_file.get_structure()
>>> coord = []
>>> for i, file_name in enumerate(file_names):
... pdb_file = PDBFile.read(file_name)
... coord.append(pdb_file.get_coord(model=1))
>>> new_stack = from_template(template, np.array(coord))
The newly created :class:`AtomArrayStack` should now be equal to
the :class:`AtomArrayStack` the PDB files were created from.
>>> print(np.allclose(new_stack.coord, atom_array_stack.coord))
True
"""
if model is None:
coord = self._pdb_file.parse_coord_multi_model()
else:
coord = self._pdb_file.parse_coord_single_model(model)
return coord
def get_structure(self, model=None, altloc="first", extra_fields=None, include_bonds=False):
"""
Get an :class:`AtomArray` or :class:`AtomArrayStack` from the PDB file.
Parameters
----------
model : int, optional
If this parameter is given, the function will return an
:class:`AtomArray` from the atoms corresponding to the given
model number (starting at 1).
Negative values are used to index models starting from the
last model insted of the first model.
If this parameter is omitted, an :class:`AtomArrayStack`
containing all models will be returned, even if the
structure contains only one model.
altloc : {'first', 'occupancy', 'all'}
This parameter defines how *altloc* IDs are handled:
- ``'first'`` - Use atoms that have the first
*altloc* ID appearing in a residue.
- ``'occupancy'`` - Use atoms that have the *altloc* ID
with the highest occupancy for a residue.
- ``'all'`` - Use all atoms.
Note that this leads to duplicate atoms.
When this option is chosen, the ``altloc_id``
annotation array is added to the returned structure.
extra_fields : list of str, optional
The strings in the list are optional annotation categories
that should be stored in the output array or stack.
These are valid values:
``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
``'charge'``.
include_bonds : bool, optional
If set to true, a :class:`BondList` will be created for the
resulting :class:`AtomArray` containing the bond information
from the file.
All bonds have :attr:`BondType.ANY`, since the PDB format
does not support bond orders.
Returns
-------
array : AtomArray or AtomArrayStack
The return type depends on the `model` parameter.
"""
if extra_fields is not None:
include_atom_id = "atom_id" in extra_fields
include_b_factor = "b_factor" in extra_fields
include_occupancy = "occupancy" in extra_fields
include_charge = "charge" in extra_fields
else:
include_atom_id = False
include_b_factor = False
include_occupancy = False
include_charge = False
if include_bonds:
# Required for mapping the bonded atom IDs to atom indices
include_atom_id = True
if altloc == "occupancy":
include_occupancy = True
if model is None:
coord = self._pdb_file.parse_coord_multi_model()
annotations = self._pdb_file.parse_annotations(
1,
include_atom_id, include_b_factor,
include_occupancy, include_charge
)
else:
coord = self._pdb_file.parse_coord_single_model(model)
annotations = self._pdb_file.parse_annotations(
model,
include_atom_id, include_b_factor,
include_occupancy, include_charge
)
(
chain_id, res_id, ins_code, res_name,
hetero, atom_name, element, altloc_id,
atom_id, b_factor, occupancy, charge
) = annotations
# Interpret uint32 arrays as unicode arrays
chain_id = np.frombuffer(chain_id, dtype="U4")
ins_code = np.frombuffer(ins_code, dtype="U1")
res_name = np.frombuffer(res_name, dtype="U3")
atom_name = np.frombuffer(atom_name, dtype="U6")
element = np.frombuffer(element, dtype="U2")
altloc_id = np.frombuffer(altloc_id, dtype="U1")
if coord.ndim == 3:
atoms = struc.AtomArrayStack(coord.shape[0], coord.shape[1])
atoms.coord = coord
else:
atoms = struc.AtomArray(coord.shape[0])
atoms.coord = coord
atoms.chain_id = chain_id
atoms.res_id = res_id
atoms.ins_code = ins_code
atoms.res_name = res_name
atoms.hetero = hetero
atoms.atom_name = atom_name
atoms.element = element
for field in (extra_fields if extra_fields is not None else []):
if field == "atom_id":
# Copy is necessary to avoid double masking in
# later altloc ID filtering
atoms.set_annotation("atom_id", atom_id.copy())
elif field == "charge":
atoms.set_annotation("charge", charge)
elif field == "occupancy":
atoms.set_annotation("occupancy", occupancy)
elif field == "b_factor":
atoms.set_annotation("b_factor", b_factor)
else:
raise ValueError(f"Unknown extra field: {field}")
box = self._pdb_file.parse_box()
if box is None:
atoms.box = None
else:
len_a, len_b, len_c, alpha, beta, gamma = box
box = struc.vectors_from_unitcell(
len_a, len_b, len_c,
np.deg2rad(alpha), np.deg2rad(beta), np.deg2rad(gamma)
)
if isinstance(atoms, struc.AtomArray):
atoms.box = box
else:
atoms.box = np.repeat(
box[np.newaxis, ...], atoms.stack_depth(), axis=0
)
# Filter altloc IDs
if altloc == "occupancy":
filter = struc.filter_highest_occupancy_altloc(
atoms, altloc_id, occupancy
)
atoms = atoms[..., filter]
atom_id = atom_id[filter] if atom_id is not None else None
elif altloc == "first":
filter = struc.filter_first_altloc(atoms, altloc_id)
atoms = atoms[..., filter]
atom_id = atom_id[filter] if atom_id is not None else None
elif altloc == "all":
atoms.set_annotation("altloc_id", altloc_id)
else:
raise ValueError(f"'{altloc}' is not a valid 'altloc' option")
if include_bonds:
bond_list = struc.BondList(
atoms.array_length(), self._pdb_file.parse_bonds(atom_id)
)
bond_list = bond_list.merge(struc.connect_via_residue_names(
atoms,
# The information for non-hetero residues and water
# are not part of CONECT records
(~atoms.hetero) | struc.filter_solvent(atoms)
))
# Remove bond order from inter residue bonds for consistency
bond_list.remove_bond_order()
atoms.bonds = bond_list
return atoms
def set_structure(self, atoms):
"""
Set the :class:`AtomArray` or :class:`AtomArrayStack` for the
file.
This makes also use of the optional annotation arrays
``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and ``'charge'``.
If the atom array (stack) contains the annotation ``'atom_id'``,
these values will be used for atom numbering instead of
continuous numbering.
Parameters
----------
array : AtomArray or AtomArrayStack
The array or stack to be saved into this file. If a stack
is given, each array in the stack is saved as separate
model.
Notes
-----
If `array` has an associated :class:`BondList`, ``CONECT``
records are also written for all non-water hetero residues
and all inter-residue connections.
"""
# Reset lines of text
self._pdb_file = RustPDBFile([])
# Write 'CRYST1' record
if atoms.box is not None:
box = atoms.box
if box.ndim == 3:
box = box[0]
len_a, len_b, len_c, alpha, beta, gamma \
= struc.unitcell_from_vectors(box)
self._pdb_file.write_box(
len_a, len_b, len_c,
np.rad2deg(alpha), np.rad2deg(beta), np.rad2deg(gamma)
)
# Write 'ATOM' and 'MODEL' records
# Convert Unicode arrays into uint32 arrays for usage in Rust
chain_id = np.frombuffer(atoms.chain_id, dtype=np.uint32).reshape(-1, 4)
ins_code = np.frombuffer(atoms.ins_code, dtype=np.uint32).reshape(-1, 1)
res_name = np.frombuffer(atoms.res_name, dtype=np.uint32).reshape(-1, 3)
atom_name = np.frombuffer(atoms.atom_name, dtype=np.uint32).reshape(-1, 6)
element = np.frombuffer(atoms.element, dtype=np.uint32).reshape(-1, 2)
categories = atoms.get_annotation_categories()
atom_id = atoms.atom_id if "atom_id" in categories else None
b_factor = atoms.b_factor if "b_factor" in categories else None
occupancy = atoms.occupancy if "occupancy" in categories else None
charge = atoms.charge if "charge" in categories else None
if isinstance(atoms, struc.AtomArray):
self._pdb_file.write_single_model(
atoms.coord, chain_id, atoms.res_id, ins_code,
res_name, atoms.hetero, atom_name, element,
atom_id, b_factor, occupancy, charge
)
elif isinstance(atoms, struc.AtomArrayStack):
self._pdb_file.write_multi_model(
atoms.coord, chain_id, atoms.res_id, ins_code,
res_name, atoms.hetero, atom_name, element,
atom_id, b_factor, occupancy, charge
)
else:
raise TypeError(
f"Expected AtomArray or AtomArrayStack, "
f"but got {type(atoms).__name__}"
)
# Write 'CONECT' records
if atoms.bonds is not None:
# Only non-water hetero records and connections between
# residues are added to the records
hetero_indices = np.where(atoms.hetero & ~struc.filter_solvent(atoms))[0]
bond_array = atoms.bonds.as_array()
bond_array = bond_array[
np.isin(bond_array[:,0], hetero_indices) |
np.isin(bond_array[:,1], hetero_indices) |
(atoms.res_id [bond_array[:,0]] != atoms.res_id [bond_array[:,1]]) |
(atoms.chain_id[bond_array[:,0]] != atoms.chain_id[bond_array[:,1]])
]
# Bond type is unused since PDB does not support bond orders
bonds, _ = struc.BondList(
atoms.array_length(), bond_array
).get_all_bonds()
atom_id = np.arange(1, atoms.array_length()+1) if atom_id is None else atom_id
self._pdb_file.write_bonds(
bonds, atom_id
)
self.lines = self._pdb_file.lines
import fastpdb
in_file = fastpdb.PDBFile.read("1AKI.pdb")
atom_array = in_file.get_structure(model=1)
out_file = fastpdb.PDBFile()
out_file.set_structure(atom_array)
out_file.write("test.pdb")