fastpdb - PyPI Package Compare versions

+76

.github/workflows/build.yml

		name: Builds
		on:
		release:
		types: [published]
		workflow_dispatch: {}

		jobs:
		build:
		name: Building distribution
		strategy:
		matrix:
		include:
		- os: ubuntu-latest
		python: '3.7'
		numpy: '1.15'
		source: false
		- os: ubuntu-latest
		python: '3.8'
		numpy: '1.16'
		source: false
		- os: ubuntu-latest
		python: '3.9'
		numpy: '1.19'
		source: true
		- os: macos-latest
		python: '3.7'
		numpy: '1.15'
		source: false
		- os: macos-latest
		python: '3.8'
		numpy: '1.16'
		source: false
		- os: macos-latest
		python: '3.9'
		numpy: '1.19'
		source: false
		- os: windows-latest
		python: '3.7'
		numpy: '1.15'
		source: false
		- os: windows-latest
		python: '3.8'
		numpy: '1.16'
		source: false
		- os: windows-latest
		python: '3.9'
		numpy: '1.19'
		source: false

		runs-on: ${{ matrix.os }}
		defaults:
		run:
		shell: bash -l {0}

		steps:
		- uses: actions/checkout@v2
		- uses: conda-incubator/setup-miniconda@v2
		with:
		activate-environment: fastpdb-dev
		auto-update-conda: true
		python-version: ${{ matrix.python }}
		- name: Installing dependencies
		run: conda install -c conda-forge numpy=$NUMPY_VERSION maturin
		env:
		NUMPY_VERSION: ${{ matrix.numpy }}
		- if: ${{ !matrix.source }}
		name: Building distribution
		run: maturin build --release --no-sdist -i python -o dist
		- if: ${{ matrix.source }}
		name: Building distribution
		run: maturin build --release -i python -o dist
		- uses: actions/upload-artifact@v2
		with:
		name: fastpdb distribution
		path: dist//*
		if-no-files-found: error

+60

.github/workflows/test.yml

		name: Tests

		on: [push, pull_request]

		jobs:
		test-simple:
		name: Testing
		strategy:
		matrix:
		include:
		- os: ubuntu-18.04
		python: '3.7'
		numpy: '1.15'
		- os: ubuntu-18.04
		python: '3.8'
		numpy: '1.16'
		- os: ubuntu-18.04
		python: '3.9'
		numpy: '1.19'
		- os: macos-latest
		python: '3.7'
		numpy: '1.15'
		- os: macos-latest
		python: '3.8'
		numpy: '1.16'
		- os: macos-latest
		python: '3.9'
		numpy: '1.19'
		- os: windows-latest
		python: '3.7'
		numpy: '1.15'
		- os: windows-latest
		python: '3.8'
		numpy: '1.16'
		- os: windows-latest
		python: '3.9'
		numpy: '1.19'

		runs-on: ${{ matrix.os }}
		defaults:
		run:
		shell: bash -l {0}

		steps:
		- uses: actions/checkout@v2
		- uses: conda-incubator/setup-miniconda@v2
		with:
		activate-environment: fastpdb-test
		auto-update-conda: true
		python-version: ${{ matrix.python }}
		- name: Installing dependencies
		run: conda install -c conda-forge numpy=$NUMPY_VERSION "biotite>=0.29" maturin pytest
		env:
		NUMPY_VERSION: ${{ matrix.numpy }}
		- name: Building distribution
		run: maturin build --release -i python -o dist
		- name: Installing distribution
		run: pip install .//dist//*.whl
		- name: Testing code
		run: pytest --assert=plain

+420

python-src/fastpdb/__init__.py

		__name__ = "fastpdb"
		__author__ = "Patrick Kunzmann"
		__all__ = ["PDBFile"]
		__version__ = "1.0.0"

		import numpy as np
		import biotite
		import biotite.structure as struc
		import biotite.structure.io.pdb as pdb
		from .fastpdb import PDBFile as RustPDBFile


		class PDBFile(biotite.TextFile):
		r"""
		This class represents a PDB file.

		This class only provides support for reading/writing the pure atom
		information (``ATOM``, ``HETATM``, ``MODEL`` and ``ENDMDL``
		records).
		``TER`` records cannot be written.

		See also
		--------
		PDBxFile

		Examples
		--------
		Load a ``\\*.pdb`` file, modify the structure and save the new
		structure into a new file:

		>>> import os.path
		>>> file = PDBFile.read(os.path.join(path_to_structures, "1l2y.pdb"))
		>>> array_stack = file.get_structure()
		>>> array_stack_mod = rotate(array_stack, [1,2,3])
		>>> file = PDBFile()
		>>> file.set_structure(array_stack_mod)
		>>> file.write(os.path.join(path_to_directory, "1l2y_mod.pdb"))
		"""

		def __init__(self):
		super().__init__()
		self._pdb_file = RustPDBFile([])

		@classmethod
		def read(cls, file):
		file = super().read(file)
		file._pdb_file = RustPDBFile(file.lines)
		return file

		def get_model_count(self):
		"""
		Get the number of models contained in the PDB file.

		Returns
		-------
		model_count : int
		The number of models.
		"""
		return self._pdb_file.get_model_count()

		def get_coord(self, model=None):
		"""
		Get only the coordinates of the PDB file.

		Parameters
		----------
		model : int, optional
		If this parameter is given, the function will return a
		2D coordinate array from the atoms corresponding to the
		given model number (starting at 1).
		Negative values are used to index models starting from the
		last model insted of the first model.
		If this parameter is omitted, an 2D coordinate array
		containing all models will be returned, even if
		the structure contains only one model.

		Returns
		-------
		coord : ndarray, shape=(m,n,3) or shape=(n,2), dtype=float
		The coordinates read from the ``ATOM`` and ``HETATM``
		records of the file.

		Notes
		-----
		Note that :func:`get_coord()` may output more coordinates than
		the atom array (stack) from the corresponding
		:func:`get_structure()` call has.
		The reason for this is, that :func:`get_structure()` filters
		altloc IDs, while `get_coord()` does not.

		Examples
		--------
		Read an :class:`AtomArrayStack` from multiple PDB files, where
		each PDB file contains the same atoms but different positions.
		This is an efficient approach when a trajectory is spread into
		multiple PDB files, as done e.g. by the Rosetta modeling
		software.

		For the purpose of this example, the PDB files are created from
		an existing :class:`AtomArrayStack`.

		>>> import os.path
		>>> from tempfile import gettempdir
		>>> file_names = []
		>>> for i in range(atom_array_stack.stack_depth()):
		... pdb_file = PDBFile()
		... pdb_file.set_structure(atom_array_stack[i])
		... file_name = os.path.join(gettempdir(), f"model_{i+1}.pdb")
		... pdb_file.write(file_name)
		... file_names.append(file_name)
		>>> print(file_names)
		['...model_1.pdb', '...model_2.pdb', ..., '...model_38.pdb']

		Now the PDB files are used to create an :class:`AtomArrayStack`,
		where each model represents a different model.

		Construct a new :class:`AtomArrayStack` with annotations taken
		from one of the created files used as template and coordinates
		from all of the PDB files.

		>>> template_file = PDBFile.read(file_names[0])
		>>> template = template_file.get_structure()
		>>> coord = []
		>>> for i, file_name in enumerate(file_names):
		... pdb_file = PDBFile.read(file_name)
		... coord.append(pdb_file.get_coord(model=1))
		>>> new_stack = from_template(template, np.array(coord))

		The newly created :class:`AtomArrayStack` should now be equal to
		the :class:`AtomArrayStack` the PDB files were created from.

		>>> print(np.allclose(new_stack.coord, atom_array_stack.coord))
		True
		"""
		if model is None:
		coord = self._pdb_file.parse_coord_multi_model()
		else:
		coord = self._pdb_file.parse_coord_single_model(model)
		return coord

		def get_structure(self, model=None, altloc="first", extra_fields=None, include_bonds=False):
		"""
		Get an :class:`AtomArray` or :class:`AtomArrayStack` from the PDB file.

		Parameters
		----------
		model : int, optional
		If this parameter is given, the function will return an
		:class:`AtomArray` from the atoms corresponding to the given
		model number (starting at 1).
		Negative values are used to index models starting from the
		last model insted of the first model.
		If this parameter is omitted, an :class:`AtomArrayStack`
		containing all models will be returned, even if the
		structure contains only one model.
		altloc : {'first', 'occupancy', 'all'}
		This parameter defines how altloc IDs are handled:
		- ``'first'`` - Use atoms that have the first
		altloc ID appearing in a residue.
		- ``'occupancy'`` - Use atoms that have the altloc ID
		with the highest occupancy for a residue.
		- ``'all'`` - Use all atoms.
		Note that this leads to duplicate atoms.
		When this option is chosen, the ``altloc_id``
		annotation array is added to the returned structure.
		extra_fields : list of str, optional
		The strings in the list are optional annotation categories
		that should be stored in the output array or stack.
		These are valid values:
		``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
		``'charge'``.
		include_bonds : bool, optional
		If set to true, a :class:`BondList` will be created for the
		resulting :class:`AtomArray` containing the bond information
		from the file.
		All bonds have :attr:`BondType.ANY`, since the PDB format
		does not support bond orders.

		Returns
		-------
		array : AtomArray or AtomArrayStack
		The return type depends on the `model` parameter.
		"""
		if extra_fields is not None:
		include_atom_id = "atom_id" in extra_fields
		include_b_factor = "b_factor" in extra_fields
		include_occupancy = "occupancy" in extra_fields
		include_charge = "charge" in extra_fields
		else:
		include_atom_id = False
		include_b_factor = False
		include_occupancy = False
		include_charge = False
		if include_bonds:
		# Required for mapping the bonded atom IDs to atom indices
		include_atom_id = True
		if altloc == "occupancy":
		include_occupancy = True


		if model is None:
		coord = self._pdb_file.parse_coord_multi_model()
		annotations = self._pdb_file.parse_annotations(
		1,
		include_atom_id, include_b_factor,
		include_occupancy, include_charge
		)
		else:
		coord = self._pdb_file.parse_coord_single_model(model)
		annotations = self._pdb_file.parse_annotations(
		model,
		include_atom_id, include_b_factor,
		include_occupancy, include_charge
		)
		(
		chain_id, res_id, ins_code, res_name,
		hetero, atom_name, element, altloc_id,
		atom_id, b_factor, occupancy, charge
		) = annotations
		# Interpret uint32 arrays as unicode arrays
		chain_id = np.frombuffer(chain_id, dtype="U4")
		ins_code = np.frombuffer(ins_code, dtype="U1")
		res_name = np.frombuffer(res_name, dtype="U3")
		atom_name = np.frombuffer(atom_name, dtype="U6")
		element = np.frombuffer(element, dtype="U2")
		altloc_id = np.frombuffer(altloc_id, dtype="U1")

		if coord.ndim == 3:
		atoms = struc.AtomArrayStack(coord.shape[0], coord.shape[1])
		atoms.coord = coord
		else:
		atoms = struc.AtomArray(coord.shape[0])
		atoms.coord = coord

		atoms.chain_id = chain_id
		atoms.res_id = res_id
		atoms.ins_code = ins_code
		atoms.res_name = res_name
		atoms.hetero = hetero
		atoms.atom_name = atom_name
		atoms.element = element

		for field in (extra_fields if extra_fields is not None else []):
		if field == "atom_id":
		# Copy is necessary to avoid double masking in
		# later altloc ID filtering
		atoms.set_annotation("atom_id", atom_id.copy())
		elif field == "charge":
		atoms.set_annotation("charge", charge)
		elif field == "occupancy":
		atoms.set_annotation("occupancy", occupancy)
		elif field == "b_factor":
		atoms.set_annotation("b_factor", b_factor)
		else:
		raise ValueError(f"Unknown extra field: {field}")


		box = self._pdb_file.parse_box()
		if box is None:
		atoms.box = None
		else:
		len_a, len_b, len_c, alpha, beta, gamma = box
		box = struc.vectors_from_unitcell(
		len_a, len_b, len_c,
		np.deg2rad(alpha), np.deg2rad(beta), np.deg2rad(gamma)
		)
		if isinstance(atoms, struc.AtomArray):
		atoms.box = box
		else:
		atoms.box = np.repeat(
		box[np.newaxis, ...], atoms.stack_depth(), axis=0
		)


		# Filter altloc IDs
		if altloc == "occupancy":
		filter = struc.filter_highest_occupancy_altloc(
		atoms, altloc_id, occupancy
		)
		atoms = atoms[..., filter]
		atom_id = atom_id[filter] if atom_id is not None else None
		elif altloc == "first":
		filter = struc.filter_first_altloc(atoms, altloc_id)
		atoms = atoms[..., filter]
		atom_id = atom_id[filter] if atom_id is not None else None
		elif altloc == "all":
		atoms.set_annotation("altloc_id", altloc_id)
		else:
		raise ValueError(f"'{altloc}' is not a valid 'altloc' option")


		if include_bonds:
		bond_list = struc.BondList(
		atoms.array_length(), self._pdb_file.parse_bonds(atom_id)
		)
		bond_list = bond_list.merge(struc.connect_via_residue_names(
		atoms,
		# The information for non-hetero residues and water
		# are not part of CONECT records
		(~atoms.hetero) \| struc.filter_solvent(atoms)
		))
		# Remove bond order from inter residue bonds for consistency
		bond_list.remove_bond_order()
		atoms.bonds = bond_list


		return atoms


		def set_structure(self, atoms):
		"""
		Set the :class:`AtomArray` or :class:`AtomArrayStack` for the
		file.

		This makes also use of the optional annotation arrays
		``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and ``'charge'``.
		If the atom array (stack) contains the annotation ``'atom_id'``,
		these values will be used for atom numbering instead of
		continuous numbering.

		Parameters
		----------
		array : AtomArray or AtomArrayStack
		The array or stack to be saved into this file. If a stack
		is given, each array in the stack is saved as separate
		model.

		Notes
		-----
		If `array` has an associated :class:`BondList`, ``CONECT``
		records are also written for all non-water hetero residues
		and all inter-residue connections.
		"""
		# Reset lines of text
		self._pdb_file = RustPDBFile([])


		# Write 'CRYST1' record
		if atoms.box is not None:
		box = atoms.box
		if box.ndim == 3:
		box = box[0]
		len_a, len_b, len_c, alpha, beta, gamma \
		= struc.unitcell_from_vectors(box)
		self._pdb_file.write_box(
		len_a, len_b, len_c,
		np.rad2deg(alpha), np.rad2deg(beta), np.rad2deg(gamma)
		)


		# Write 'ATOM' and 'MODEL' records
		# Convert Unicode arrays into uint32 arrays for usage in Rust
		chain_id = np.frombuffer(atoms.chain_id, dtype=np.uint32).reshape(-1, 4)
		ins_code = np.frombuffer(atoms.ins_code, dtype=np.uint32).reshape(-1, 1)
		res_name = np.frombuffer(atoms.res_name, dtype=np.uint32).reshape(-1, 3)
		atom_name = np.frombuffer(atoms.atom_name, dtype=np.uint32).reshape(-1, 6)
		element = np.frombuffer(atoms.element, dtype=np.uint32).reshape(-1, 2)

		categories = atoms.get_annotation_categories()
		atom_id = atoms.atom_id if "atom_id" in categories else None
		b_factor = atoms.b_factor if "b_factor" in categories else None
		occupancy = atoms.occupancy if "occupancy" in categories else None
		charge = atoms.charge if "charge" in categories else None

		# Convert to correct dtype for Rust function call, if necessary
		coord = atoms.coord.astype(np.float32, copy=False)
		res_id = atoms.res_id.astype(np.int64, copy=False)
		hetero = atoms.hetero.astype(bool, copy=False)
		if atom_id is not None:
		atom_id = atom_id.astype(np.int64, copy=False)
		if b_factor is not None:
		b_factor = b_factor.astype(np.float64, copy=False)
		if occupancy is not None:
		occupancy = occupancy.astype(np.float64, copy=False)
		if charge is not None:
		charge = charge.astype(np.int64, copy=False)

		if isinstance(atoms, struc.AtomArray):
		self._pdb_file.write_single_model(
		coord, chain_id, res_id, ins_code,
		res_name, hetero, atom_name, element,
		atom_id, b_factor, occupancy, charge
		)
		elif isinstance(atoms, struc.AtomArrayStack):
		self._pdb_file.write_multi_model(
		coord, chain_id, res_id, ins_code,
		res_name, hetero, atom_name, element,
		atom_id, b_factor, occupancy, charge
		)
		else:
		raise TypeError(
		f"Expected AtomArray or AtomArrayStack, "
		f"but got {type(atoms).__name__}"
		)


		# Write 'CONECT' records
		if atoms.bonds is not None:
		# Only non-water hetero records and connections between
		# residues are added to the records
		hetero_indices = np.where(atoms.hetero & ~struc.filter_solvent(atoms))[0]
		bond_array = atoms.bonds.as_array()
		bond_array = bond_array[
		np.isin(bond_array[:,0], hetero_indices) \|
		np.isin(bond_array[:,1], hetero_indices) \|
		(atoms.res_id [bond_array[:,0]] != atoms.res_id [bond_array[:,1]]) \|
		(atoms.chain_id[bond_array[:,0]] != atoms.chain_id[bond_array[:,1]])
		]
		# Bond type is unused since PDB does not support bond orders
		bonds, _ = struc.BondList(
		atoms.array_length(), bond_array
		).get_all_bonds()
		atom_id = np.arange(1, atoms.array_length()+1, dtype=np.int64) \
		if atom_id is None else atom_id
		self._pdb_file.write_bonds(
		bonds.astype(np.int32, copy=False), atom_id
		)


		self.lines = self._pdb_file.lines

+1

-3

benchmark.py

		@@ -23,6 +23,4 @@ import time

		pdb_file_path = rcsb.fetch(PDB_ID, "pdb", tempfile.gettempdir())

		pdb_file_path = rcsb.fetch(PDB_ID, "pdb", ".")
		#pdb_file_path = rcsb.fetch(PDB_ID, "pdb", tempfile.gettempdir())

		fastpdb_runtimes = {}
		@@ -29,0 +27,0 @@ biotite_runtimes = {}

+61

-76

benchmark.svg

		@@ -9,3 +9,3 @@ <?xml version="1.0" encoding="utf-8" standalone="no"?>
		<dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
		<dc:date>2021-10-06T16:47:02.199004</dc:date>
		<dc:date>2021-10-07T18:07:40.322632</dc:date>
		<dc:format>image/svg+xml</dc:format>
		@@ -42,6 +42,6 @@ <dc:creator>
		<g id="patch_3">
		<path clip-path="url(#p7e47fec0d4)" d="M 83.987045 256.16
		<path clip-path="url(#p3f95facf3d)" d="M 83.987045 256.16
		L 129.611136 256.16
		L 129.611136 184.500366
		L 83.987045 184.500366
		L 129.611136 176.801224
		L 83.987045 176.801224
		z
		@@ -51,6 +51,6 @@ " style="fill:#0a6efd;stroke:#000000;stroke-linejoin:miter;"/>
		<g id="patch_4">
		<path clip-path="url(#p7e47fec0d4)" d="M 266.483409 256.16
		<path clip-path="url(#p3f95facf3d)" d="M 266.483409 256.16
		L 312.1075 256.16
		L 312.1075 149.988014
		L 266.483409 149.988014
		L 312.1075 128.837933
		L 266.483409 128.837933
		z
		@@ -60,3 +60,3 @@ " style="fill:#0a6efd;stroke:#000000;stroke-linejoin:miter;"/>
		<g id="patch_5">
		<path clip-path="url(#p7e47fec0d4)" d="M 448.979773 256.16
		<path clip-path="url(#p3f95facf3d)" d="M 448.979773 256.16
		L 494.603864 256.16
		@@ -69,6 +69,6 @@ L 494.603864 35.069091
		<g id="patch_6">
		<path clip-path="url(#p7e47fec0d4)" d="M 129.611136 256.16
		<path clip-path="url(#p3f95facf3d)" d="M 129.611136 256.16
		L 175.235227 256.16
		L 175.235227 236.952723
		L 129.611136 236.952723
		L 175.235227 235.507445
		L 129.611136 235.507445
		z
		@@ -78,6 +78,6 @@ " style="fill:#e1301d;stroke:#000000;stroke-linejoin:miter;"/>
		<g id="patch_7">
		<path clip-path="url(#p7e47fec0d4)" d="M 312.1075 256.16
		<path clip-path="url(#p3f95facf3d)" d="M 312.1075 256.16
		L 357.731591 256.16
		L 357.731591 236.952723
		L 312.1075 236.952723
		L 357.731591 235.507445
		L 312.1075 235.507445
		z
		@@ -87,6 +87,6 @@ " style="fill:#e1301d;stroke:#000000;stroke-linejoin:miter;"/>
		<g id="patch_8">
		<path clip-path="url(#p7e47fec0d4)" d="M 494.603864 256.16
		<path clip-path="url(#p3f95facf3d)" d="M 494.603864 256.16
		L 540.227955 256.16
		L 540.227955 236.952723
		L 494.603864 236.952723
		L 540.227955 235.507445
		L 494.603864 235.507445
		z
		@@ -101,6 +101,6 @@ " style="fill:#e1301d;stroke:#000000;stroke-linejoin:miter;"/>
		L 0 3.5
		" id="m1acca59ad8" style="stroke:#000000;stroke-width:0.8;"/>
		" id="m37efa55f1f" style="stroke:#000000;stroke-width:0.8;"/>
		</defs>
		<g>
		<use style="stroke:#000000;stroke-width:0.8;" x="129.611136" xlink:href="#m1acca59ad8" y="256.16"/>
		<use style="stroke:#000000;stroke-width:0.8;" x="129.611136" xlink:href="#m37efa55f1f" y="256.16"/>
		</g>
		@@ -301,3 +301,3 @@ </g>
		<g>
		<use style="stroke:#000000;stroke-width:0.8;" x="312.1075" xlink:href="#m1acca59ad8" y="256.16"/>
		<use style="stroke:#000000;stroke-width:0.8;" x="312.1075" xlink:href="#m37efa55f1f" y="256.16"/>
		</g>
		@@ -363,3 +363,3 @@ </g>
		<g>
		<use style="stroke:#000000;stroke-width:0.8;" x="494.603864" xlink:href="#m1acca59ad8" y="256.16"/>
		<use style="stroke:#000000;stroke-width:0.8;" x="494.603864" xlink:href="#m37efa55f1f" y="256.16"/>
		</g>
		@@ -443,6 +443,6 @@ </g>
		L -3.5 0
		" id="me6d8a45849" style="stroke:#000000;stroke-width:0.8;"/>
		" id="m7fcad94283" style="stroke:#000000;stroke-width:0.8;"/>
		</defs>
		<g>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="236.952723"/>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="235.507445"/>
		</g>
		@@ -452,3 +452,3 @@ </g>
		<!-- 1× -->
		<g transform="translate(36.484375 241.511786)scale(0.12 -0.12)">
		<g transform="translate(36.484375 240.066507)scale(0.12 -0.12)">
		<defs>
		@@ -493,3 +493,3 @@ <path d="M 794 531
		<g>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="217.745446"/>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="214.854889"/>
		</g>
		@@ -499,3 +499,3 @@ </g>
		<!-- 2× -->
		<g transform="translate(36.484375 222.304509)scale(0.12 -0.12)">
		<g transform="translate(36.484375 219.413952)scale(0.12 -0.12)">
		<defs>
		@@ -535,3 +535,3 @@ <path d="M 1228 531
		<g>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="198.53817"/>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="194.202334"/>
		</g>
		@@ -541,3 +541,3 @@ </g>
		<!-- 3× -->
		<g transform="translate(36.484375 203.097232)scale(0.12 -0.12)">
		<g transform="translate(36.484375 198.761396)scale(0.12 -0.12)">
		<defs>
		@@ -585,3 +585,3 @@ <path d="M 2597 2516
		<g>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="179.330893"/>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="173.549778"/>
		</g>
		@@ -591,3 +591,3 @@ </g>
		<!-- 4× -->
		<g transform="translate(36.484375 183.889955)scale(0.12 -0.12)">
		<g transform="translate(36.484375 178.108841)scale(0.12 -0.12)">
		<defs>
		@@ -622,3 +622,3 @@ <path d="M 2419 4116
		<g>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="160.123616"/>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="152.897223"/>
		</g>
		@@ -628,3 +628,3 @@ </g>
		<!-- 5× -->
		<g transform="translate(36.484375 164.682679)scale(0.12 -0.12)">
		<g transform="translate(36.484375 157.456285)scale(0.12 -0.12)">
		<defs>
		@@ -665,3 +665,3 @@ <path d="M 691 4666
		<g>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="140.916339"/>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="132.244667"/>
		</g>
		@@ -671,3 +671,3 @@ </g>
		<!-- 6× -->
		<g transform="translate(36.484375 145.475402)scale(0.12 -0.12)">
		<g transform="translate(36.484375 136.80373)scale(0.12 -0.12)">
		<defs>
		@@ -713,3 +713,3 @@ <path d="M 2113 2584
		<g>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="121.709063"/>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="111.592112"/>
		</g>
		@@ -719,3 +719,3 @@ </g>
		<!-- 7× -->
		<g transform="translate(36.484375 126.268125)scale(0.12 -0.12)">
		<g transform="translate(36.484375 116.151174)scale(0.12 -0.12)">
		<defs>
		@@ -741,3 +741,3 @@ <path d="M 525 4666
		<g>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="102.501786"/>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="90.939556"/>
		</g>
		@@ -747,3 +747,3 @@ </g>
		<!-- 8× -->
		<g transform="translate(36.484375 107.060848)scale(0.12 -0.12)">
		<g transform="translate(36.484375 95.498619)scale(0.12 -0.12)">
		<defs>
		@@ -798,3 +798,3 @@ <path d="M 2034 2216
		<g>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="83.294509"/>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="70.287001"/>
		</g>
		@@ -804,3 +804,3 @@ </g>
		<!-- 9× -->
		<g transform="translate(36.484375 87.853572)scale(0.12 -0.12)">
		<g transform="translate(36.484375 74.846063)scale(0.12 -0.12)">
		<defs>
		@@ -846,3 +846,3 @@ <path d="M 703 97
		<g>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="64.087232"/>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="49.634445"/>
		</g>
		@@ -852,3 +852,3 @@ </g>
		<!-- 10× -->
		<g transform="translate(28.849375 68.646295)scale(0.12 -0.12)">
		<g transform="translate(28.849375 54.193508)scale(0.12 -0.12)">
		<defs>
		@@ -886,3 +886,3 @@ <path d="M 2034 4250
		<g>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="44.879956"/>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#m7fcad94283" y="28.98189"/>
		</g>
		@@ -892,3 +892,3 @@ </g>
		<!-- 11× -->
		<g transform="translate(28.849375 49.439018)scale(0.12 -0.12)">
		<g transform="translate(28.849375 33.540952)scale(0.12 -0.12)">
		<use xlink:href="#DejaVuSans-31"/>
		@@ -900,18 +900,3 @@ <use x="63.623047" xlink:href="#DejaVuSans-31"/>
		</g>
		<g id="ytick_12">
		<g id="line2d_15">
		<g>
		<use style="stroke:#000000;stroke-width:0.8;" x="61.175" xlink:href="#me6d8a45849" y="25.672679"/>
		</g>
		</g>
		<g id="text_15">
		<!-- 12× -->
		<g transform="translate(28.849375 30.231741)scale(0.12 -0.12)">
		<use xlink:href="#DejaVuSans-31"/>
		<use x="63.623047" xlink:href="#DejaVuSans-32"/>
		<use x="127.246094" xlink:href="#DejaVuSans-d7"/>
		</g>
		</g>
		</g>
		<g id="text_16">
		<g id="text_15">
		<!-- Speedup -->
		@@ -1030,5 +1015,5 @@ <g transform="translate(22.35375 160.9825)rotate(-90)scale(0.12 -0.12)">
		</g>
		<g id="text_17">
		<!-- 3.7× -->
		<g transform="translate(92.229403 179.004741)scale(0.12 -0.12)">
		<g id="text_16">
		<!-- 3.8× -->
		<g transform="translate(92.229403 171.305599)scale(0.12 -0.12)">
		<defs>
		@@ -1045,22 +1030,22 @@ <path d="M 684 794
		<use x="63.623047" xlink:href="#DejaVuSans-2e"/>
		<use x="95.410156" xlink:href="#DejaVuSans-37"/>
		<use x="95.410156" xlink:href="#DejaVuSans-38"/>
		<use x="159.033203" xlink:href="#DejaVuSans-d7"/>
		</g>
		</g>
		<g id="text_18">
		<!-- 5.5× -->
		<g transform="translate(274.725767 144.492389)scale(0.12 -0.12)">
		<use xlink:href="#DejaVuSans-35"/>
		<g id="text_17">
		<!-- 6.2× -->
		<g transform="translate(274.725767 123.342308)scale(0.12 -0.12)">
		<use xlink:href="#DejaVuSans-36"/>
		<use x="63.623047" xlink:href="#DejaVuSans-2e"/>
		<use x="95.410156" xlink:href="#DejaVuSans-35"/>
		<use x="95.410156" xlink:href="#DejaVuSans-32"/>
		<use x="159.033203" xlink:href="#DejaVuSans-d7"/>
		</g>
		</g>
		<g id="text_19">
		<!-- 11.5× -->
		<g id="text_18">
		<!-- 10.7× -->
		<g transform="translate(453.404631 29.573466)scale(0.12 -0.12)">
		<use xlink:href="#DejaVuSans-31"/>
		<use x="63.623047" xlink:href="#DejaVuSans-31"/>
		<use x="63.623047" xlink:href="#DejaVuSans-30"/>
		<use x="127.246094" xlink:href="#DejaVuSans-2e"/>
		<use x="159.033203" xlink:href="#DejaVuSans-35"/>
		<use x="159.033203" xlink:href="#DejaVuSans-37"/>
		<use x="222.65625" xlink:href="#DejaVuSans-d7"/>
		@@ -1078,3 +1063,3 @@ </g>
		</g>
		<g id="text_20">
		<g id="text_19">
		<!-- fastpdb -->
		@@ -1179,3 +1164,3 @@ <g transform="translate(105.575 32.878125)scale(0.12 -0.12)">
		</g>
		<g id="text_21">
		<g id="text_20">
		<!-- biotite -->
		@@ -1196,3 +1181,3 @@ <g transform="translate(105.575 50.491875)scale(0.12 -0.12)">
		<defs>
		<clipPath id="p7e47fec0d4">
		<clipPath id="p3f95facf3d">
		<rect height="243.2" width="501.865" x="61.175" y="12.96"/>
		@@ -1199,0 +1184,0 @@ </clipPath>

+4

-1

Cargo.toml

		[package]
		name = "fastpdb"
		version = "0.1.0"
		version = "1.0.0"
		edition = "2018"

		[package.metadata.maturin]
		python-source = "python-src"

		[dependencies]
		@@ -7,0 +10,0 @@ numpy = "0.14"

+5

-5

PKG-INFO

		Metadata-Version: 2.1
		Name: fastpdb
		Version: 0.1.0
		Classifier: Development Status :: 1 - Planning
		Version: 1.0.0
		Classifier: Development Status :: 5 - Production/Stable
		Classifier: Intended Audience :: Science/Research
		@@ -50,6 +50,6 @@ Classifier: License :: OSI Approved :: BSD License
		Description-Content-Type: text/x-rst; charset=UTF-8
		Project-URL: homepage, https://github.com/biotite-dev/fastpdb
		Project-URL: repository, https://github.com/biotite-dev/fastpdb
		Project-URL: homepage, https://github.com/biotite-dev/fastpdb

		.. image:: logo.svg
		.. image:: https://raw.githubusercontent.com/biotite-dev/fastpdb/main/logo.svg
		:width: 300
		@@ -100,5 +100,5 @@ :align: center

		.. image:: benchmark.svg
		.. image:: https://raw.githubusercontent.com/biotite-dev/fastpdb/main/benchmark.svg
		:width: 800
		:align: center

+2

-2

pyproject.toml

		[project]
		name = "fastpdb"
		version = "0.1.0"
		version = "1.0.0"
		description = "A high performance drop-in replacement for Biotite's PDBFile."
		@@ -16,3 +16,3 @@ readme = "README.rst"
		classifiers = [
		"Development Status :: 1 - Planning",
		"Development Status :: 5 - Production/Stable",
		"Intended Audience :: Science/Research",
		@@ -19,0 +19,0 @@ "License :: OSI Approved :: BSD License",

+2

-2

README.rst

		@@ -1,2 +0,2 @@
		.. image:: logo.svg
		.. image:: https://raw.githubusercontent.com/biotite-dev/fastpdb/main/logo.svg
		:width: 300
		@@ -47,4 +47,4 @@ :align: center

		.. image:: benchmark.svg
		.. image:: https://raw.githubusercontent.com/biotite-dev/fastpdb/main/benchmark.svg
		:width: 800
		:align: center

+35

-44

src/lib.rs

		@@ -6,2 +6,3 @@ //! Low-level PDB file parsing and writing.
		use std::collections::HashMap;
		use std::cmp::Ordering;
		use ndarray::{Array, Ix1, Ix2, Ix3};
		@@ -52,3 +53,3 @@ use pyo3::prelude::*;
		fn get_model_count(&self) -> usize {
		return self.get_model_start_indices().len()
		self.get_model_start_indices().len()
		}
		@@ -195,3 +196,3 @@
		write_string_to_array(&mut res_name, atom_i, line[17..20].trim());
		hetero[atom_i] = if &line[0..4] == "ATOM" { false } else { true };
		hetero[atom_i] = !(&line[0..4] == "ATOM");
		write_string_to_array(&mut atom_name, atom_i, line[12..16].trim());
		@@ -220,3 +221,3 @@ write_string_to_array(&mut element, atom_i, line[76..78].trim());
		else {
		number = raw_number.to_digit(10).ok_or(
		number = raw_number.to_digit(10).ok_or_else( \|\|
		BadStructureError::new_err(format!(
		@@ -359,11 +360,7 @@ "'{}' cannot be parsed into a number", raw_number
		let c = arr[i];
		if c > 0 {
		format!("{:1}+", c)
		match c.cmp(&0) {
		Ordering::Greater => format!("{:1}+", c),
		Ordering::Less => format!("{:1}-", -c),
		Ordering::Equal => String::from(" ")
		}
		else if c < 0 {
		format!("{:1}-", -c)
		}
		else {
		String::from(" ")
		}
		}),
		@@ -430,11 +427,7 @@ ));
		let c = arr[i];
		if c > 0 {
		format!("{:1}+", c)
		match c.cmp(&0) {
		Ordering::Greater => format!("{:1}+", c),
		Ordering::Less => format!("{:1}-", -c),
		Ordering::Equal => String::from(" ")
		}
		else if c < 0 {
		format!("{:1}-", -c)
		}
		else {
		String::from(" ")
		}
		}),
		@@ -528,16 +521,16 @@ ));
		}
		coord[[atom_i, 0]] = line[30..38].trim().parse().or_else(\|_\|
		Err(BadStructureError::new_err(format!(
		coord[[atom_i, 0]] = line[30..38].trim().parse().map_err(\|_\|
		BadStructureError::new_err(format!(
		"'{}' cannot be parsed into a float", line[30..38].trim()
		)))
		))
		)?;
		coord[[atom_i, 1]] = line[38..46].trim().parse().or_else(\|_\|
		Err(BadStructureError::new_err(format!(
		coord[[atom_i, 1]] = line[38..46].trim().parse().map_err(\|_\|
		BadStructureError::new_err(format!(
		"'{}' cannot be parsed into a float", line[38..46].trim()
		)))
		))
		)?;
		coord[[atom_i, 2]] = line[46..54].trim().parse().or_else(\|_\|
		Err(BadStructureError::new_err(format!(
		coord[[atom_i, 2]] = line[46..54].trim().parse().map_err(\|_\|
		BadStructureError::new_err(format!(
		"'{}' cannot be parsed into a float", line[46..54].trim()
		)))
		))
		)?;
		@@ -581,3 +574,3 @@ }
		// In these cases model starting index is set to 0
		if model_start_i.len() == 0 {
		if model_start_i.is_empty() {
		model_start_i = vec![0]
		@@ -593,13 +586,11 @@ }
		model: isize,
		model_start_i: &Vec<usize>) -> PyResult<(usize, usize)> {
		model_start_i: &[usize]) -> PyResult<(usize, usize)> {
		let model_i: isize;
		if model > 0 {
		model_i = model - 1;
		}
		else if model < 0 {
		model_i = model_start_i.len() as isize + model;
		}
		else {
		return Err(exceptions::PyValueError::new_err("Model index must not be 0"));
		}
		match model.cmp(&0) {
		Ordering::Greater => model_i = model - 1,
		Ordering::Less => model_i = model_start_i.len() as isize + model,
		Ordering::Equal => return Err(exceptions::PyValueError::new_err(
		"Model index must not be 0"
		)),
		};

		@@ -626,4 +617,4 @@ if model_i >= model_start_i.len() as isize \|\| model_i < 0 {
		fn get_model_length(&self,
		model_start_i: &Vec<usize>,
		atom_line_i: &Vec<usize>) -> PyResult<usize> {
		model_start_i: &[usize],
		atom_line_i: &[usize]) -> PyResult<usize> {
		let n_models = model_start_i.len();
		@@ -687,6 +678,6 @@ let mut length: Option<usize> = None;
		fn parse_number<T: FromStr>(string: &str) -> PyResult<T> {
		string.trim().parse().or_else(\|_\|
		Err(BadStructureError::new_err(format!(
		string.trim().parse().map_err(\|_\|
		BadStructureError::new_err(format!(
		"'{}' cannot be parsed into a number", string.trim()
		)))
		))
		)
		@@ -693,0 +684,0 @@ }

-405

fastpdb/__init__.py

		__name__ = "fastpdb"
		__author__ = "Patrick Kunzmann"
		__all__ = ["PDBFile"]
		__version__ = "0.1.0"

		import numpy as np
		import biotite
		import biotite.structure as struc
		import biotite.structure.io.pdb as pdb
		from .fastpdb import PDBFile as RustPDBFile


		class PDBFile(biotite.TextFile):
		r"""
		This class represents a PDB file.

		This class only provides support for reading/writing the pure atom
		information (ATOM, HETATM, MODEL and ENDMDL records). TER
		records cannot be written.

		See also
		--------
		PDBxFile

		Examples
		--------
		Load a `\\*.pdb` file, modify the structure and save the new
		structure into a new file:

		>>> import os.path
		>>> file = PDBFile.read(os.path.join(path_to_structures, "1l2y.pdb"))
		>>> array_stack = file.get_structure()
		>>> array_stack_mod = rotate(array_stack, [1,2,3])
		>>> file = PDBFile()
		>>> file.set_structure(array_stack_mod)
		>>> file.write(os.path.join(path_to_directory, "1l2y_mod.pdb"))
		"""

		def __init__(self):
		super().__init__()
		self._pdb_file = RustPDBFile([])

		@classmethod
		def read(cls, file):
		file = super().read(file)
		file._pdb_file = RustPDBFile(file.lines)
		return file

		def get_model_count(self):
		"""
		Get the number of models contained in the PDB file.

		Returns
		-------
		model_count : int
		The number of models.
		"""
		return self._pdb_file.get_model_count()

		def get_coord(self, model=None):
		"""
		Get only the coordinates of the PDB file.

		Parameters
		----------
		model : int, optional
		If this parameter is given, the function will return a
		2D coordinate array from the atoms corresponding to the
		given model number (starting at 1).
		Negative values are used to index models starting from the
		last model insted of the first model.
		If this parameter is omitted, an 2D coordinate array
		containing all models will be returned, even if
		the structure contains only one model.

		Returns
		-------
		coord : ndarray, shape=(m,n,3) or shape=(n,2), dtype=float
		The coordinates read from the ATOM and HETATM records of the
		file.

		Notes
		-----
		Note that :func:`get_coord()` may output more coordinates than
		the atom array (stack) from the corresponding
		:func:`get_structure()` call has.
		The reason for this is, that :func:`get_structure()` filters
		altloc IDs, while `get_coord()` does not.

		Examples
		--------
		Read an :class:`AtomArrayStack` from multiple PDB files, where
		each PDB file contains the same atoms but different positions.
		This is an efficient approach when a trajectory is spread into
		multiple PDB files, as done e.g. by the Rosetta modeling
		software.

		For the purpose of this example, the PDB files are created from
		an existing :class:`AtomArrayStack`.

		>>> import os.path
		>>> from tempfile import gettempdir
		>>> file_names = []
		>>> for i in range(atom_array_stack.stack_depth()):
		... pdb_file = PDBFile()
		... pdb_file.set_structure(atom_array_stack[i])
		... file_name = os.path.join(gettempdir(), f"model_{i+1}.pdb")
		... pdb_file.write(file_name)
		... file_names.append(file_name)
		>>> print(file_names)
		['...model_1.pdb', '...model_2.pdb', ..., '...model_38.pdb']

		Now the PDB files are used to create an :class:`AtomArrayStack`,
		where each model represents a different model.

		Construct a new :class:`AtomArrayStack` with annotations taken
		from one of the created files used as template and coordinates
		from all of the PDB files.

		>>> template_file = PDBFile.read(file_names[0])
		>>> template = template_file.get_structure()
		>>> coord = []
		>>> for i, file_name in enumerate(file_names):
		... pdb_file = PDBFile.read(file_name)
		... coord.append(pdb_file.get_coord(model=1))
		>>> new_stack = from_template(template, np.array(coord))

		The newly created :class:`AtomArrayStack` should now be equal to
		the :class:`AtomArrayStack` the PDB files were created from.

		>>> print(np.allclose(new_stack.coord, atom_array_stack.coord))
		True
		"""
		if model is None:
		coord = self._pdb_file.parse_coord_multi_model()
		else:
		coord = self._pdb_file.parse_coord_single_model(model)
		return coord

		def get_structure(self, model=None, altloc="first", extra_fields=None, include_bonds=False):
		"""
		Get an :class:`AtomArray` or :class:`AtomArrayStack` from the PDB file.

		Parameters
		----------
		model : int, optional
		If this parameter is given, the function will return an
		:class:`AtomArray` from the atoms corresponding to the given
		model number (starting at 1).
		Negative values are used to index models starting from the
		last model insted of the first model.
		If this parameter is omitted, an :class:`AtomArrayStack`
		containing all models will be returned, even if the
		structure contains only one model.
		altloc : {'first', 'occupancy', 'all'}
		This parameter defines how altloc IDs are handled:
		- ``'first'`` - Use atoms that have the first
		altloc ID appearing in a residue.
		- ``'occupancy'`` - Use atoms that have the altloc ID
		with the highest occupancy for a residue.
		- ``'all'`` - Use all atoms.
		Note that this leads to duplicate atoms.
		When this option is chosen, the ``altloc_id``
		annotation array is added to the returned structure.
		extra_fields : list of str, optional
		The strings in the list are optional annotation categories
		that should be stored in the output array or stack.
		These are valid values:
		``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
		``'charge'``.
		include_bonds : bool, optional
		If set to true, a :class:`BondList` will be created for the
		resulting :class:`AtomArray` containing the bond information
		from the file.
		All bonds have :attr:`BondType.ANY`, since the PDB format
		does not support bond orders.

		Returns
		-------
		array : AtomArray or AtomArrayStack
		The return type depends on the `model` parameter.
		"""
		if extra_fields is not None:
		include_atom_id = "atom_id" in extra_fields
		include_b_factor = "b_factor" in extra_fields
		include_occupancy = "occupancy" in extra_fields
		include_charge = "charge" in extra_fields
		else:
		include_atom_id = False
		include_b_factor = False
		include_occupancy = False
		include_charge = False
		if include_bonds:
		# Required for mapping the bonded atom IDs to atom indices
		include_atom_id = True
		if altloc == "occupancy":
		include_occupancy = True


		if model is None:
		coord = self._pdb_file.parse_coord_multi_model()
		annotations = self._pdb_file.parse_annotations(
		1,
		include_atom_id, include_b_factor,
		include_occupancy, include_charge
		)
		else:
		coord = self._pdb_file.parse_coord_single_model(model)
		annotations = self._pdb_file.parse_annotations(
		model,
		include_atom_id, include_b_factor,
		include_occupancy, include_charge
		)
		(
		chain_id, res_id, ins_code, res_name,
		hetero, atom_name, element, altloc_id,
		atom_id, b_factor, occupancy, charge
		) = annotations
		# Interpret uint32 arrays as unicode arrays
		chain_id = np.frombuffer(chain_id, dtype="U4")
		ins_code = np.frombuffer(ins_code, dtype="U1")
		res_name = np.frombuffer(res_name, dtype="U3")
		atom_name = np.frombuffer(atom_name, dtype="U6")
		element = np.frombuffer(element, dtype="U2")
		altloc_id = np.frombuffer(altloc_id, dtype="U1")

		if coord.ndim == 3:
		atoms = struc.AtomArrayStack(coord.shape[0], coord.shape[1])
		atoms.coord = coord
		else:
		atoms = struc.AtomArray(coord.shape[0])
		atoms.coord = coord

		atoms.chain_id = chain_id
		atoms.res_id = res_id
		atoms.ins_code = ins_code
		atoms.res_name = res_name
		atoms.hetero = hetero
		atoms.atom_name = atom_name
		atoms.element = element

		for field in (extra_fields if extra_fields is not None else []):
		if field == "atom_id":
		# Copy is necessary to avoid double masking in
		# later altloc ID filtering
		atoms.set_annotation("atom_id", atom_id.copy())
		elif field == "charge":
		atoms.set_annotation("charge", charge)
		elif field == "occupancy":
		atoms.set_annotation("occupancy", occupancy)
		elif field == "b_factor":
		atoms.set_annotation("b_factor", b_factor)
		else:
		raise ValueError(f"Unknown extra field: {field}")


		box = self._pdb_file.parse_box()
		if box is None:
		atoms.box = None
		else:
		len_a, len_b, len_c, alpha, beta, gamma = box
		box = struc.vectors_from_unitcell(
		len_a, len_b, len_c,
		np.deg2rad(alpha), np.deg2rad(beta), np.deg2rad(gamma)
		)
		if isinstance(atoms, struc.AtomArray):
		atoms.box = box
		else:
		atoms.box = np.repeat(
		box[np.newaxis, ...], atoms.stack_depth(), axis=0
		)


		# Filter altloc IDs
		if altloc == "occupancy":
		filter = struc.filter_highest_occupancy_altloc(
		atoms, altloc_id, occupancy
		)
		atoms = atoms[..., filter]
		atom_id = atom_id[filter] if atom_id is not None else None
		elif altloc == "first":
		filter = struc.filter_first_altloc(atoms, altloc_id)
		atoms = atoms[..., filter]
		atom_id = atom_id[filter] if atom_id is not None else None
		elif altloc == "all":
		atoms.set_annotation("altloc_id", altloc_id)
		else:
		raise ValueError(f"'{altloc}' is not a valid 'altloc' option")


		if include_bonds:
		bond_list = struc.BondList(
		atoms.array_length(), self._pdb_file.parse_bonds(atom_id)
		)
		bond_list = bond_list.merge(struc.connect_via_residue_names(
		atoms,
		# The information for non-hetero residues and water
		# are not part of CONECT records
		(~atoms.hetero) \| struc.filter_solvent(atoms)
		))
		# Remove bond order from inter residue bonds for consistency
		bond_list.remove_bond_order()
		atoms.bonds = bond_list


		return atoms


		def set_structure(self, atoms):
		"""
		Set the :class:`AtomArray` or :class:`AtomArrayStack` for the
		file.

		This makes also use of the optional annotation arrays
		``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and ``'charge'``.
		If the atom array (stack) contains the annotation ``'atom_id'``,
		these values will be used for atom numbering instead of
		continuous numbering.

		Parameters
		----------
		array : AtomArray or AtomArrayStack
		The array or stack to be saved into this file. If a stack
		is given, each array in the stack is saved as separate
		model.

		Notes
		-----
		If `array` has an associated :class:`BondList`, ``CONECT``
		records are also written for all non-water hetero residues
		and all inter-residue connections.
		"""
		# Reset lines of text
		self._pdb_file = RustPDBFile([])


		# Write 'CRYST1' record
		if atoms.box is not None:
		box = atoms.box
		if box.ndim == 3:
		box = box[0]
		len_a, len_b, len_c, alpha, beta, gamma \
		= struc.unitcell_from_vectors(box)
		self._pdb_file.write_box(
		len_a, len_b, len_c,
		np.rad2deg(alpha), np.rad2deg(beta), np.rad2deg(gamma)
		)


		# Write 'ATOM' and 'MODEL' records
		# Convert Unicode arrays into uint32 arrays for usage in Rust
		chain_id = np.frombuffer(atoms.chain_id, dtype=np.uint32).reshape(-1, 4)
		ins_code = np.frombuffer(atoms.ins_code, dtype=np.uint32).reshape(-1, 1)
		res_name = np.frombuffer(atoms.res_name, dtype=np.uint32).reshape(-1, 3)
		atom_name = np.frombuffer(atoms.atom_name, dtype=np.uint32).reshape(-1, 6)
		element = np.frombuffer(atoms.element, dtype=np.uint32).reshape(-1, 2)

		categories = atoms.get_annotation_categories()
		atom_id = atoms.atom_id if "atom_id" in categories else None
		b_factor = atoms.b_factor if "b_factor" in categories else None
		occupancy = atoms.occupancy if "occupancy" in categories else None
		charge = atoms.charge if "charge" in categories else None

		if isinstance(atoms, struc.AtomArray):
		self._pdb_file.write_single_model(
		atoms.coord, chain_id, atoms.res_id, ins_code,
		res_name, atoms.hetero, atom_name, element,
		atom_id, b_factor, occupancy, charge
		)
		elif isinstance(atoms, struc.AtomArrayStack):
		self._pdb_file.write_multi_model(
		atoms.coord, chain_id, atoms.res_id, ins_code,
		res_name, atoms.hetero, atom_name, element,
		atom_id, b_factor, occupancy, charge
		)
		else:
		raise TypeError(
		f"Expected AtomArray or AtomArrayStack, "
		f"but got {type(atoms).__name__}"
		)


		# Write 'CONECT' records
		if atoms.bonds is not None:
		# Only non-water hetero records and connections between
		# residues are added to the records
		hetero_indices = np.where(atoms.hetero & ~struc.filter_solvent(atoms))[0]
		bond_array = atoms.bonds.as_array()
		bond_array = bond_array[
		np.isin(bond_array[:,0], hetero_indices) \|
		np.isin(bond_array[:,1], hetero_indices) \|
		(atoms.res_id [bond_array[:,0]] != atoms.res_id [bond_array[:,1]]) \|
		(atoms.chain_id[bond_array[:,0]] != atoms.chain_id[bond_array[:,1]])
		]
		# Bond type is unused since PDB does not support bond orders
		bonds, _ = struc.BondList(
		atoms.array_length(), bond_array
		).get_all_bonds()
		atom_id = np.arange(1, atoms.array_length()+1) if atom_id is None else atom_id
		self._pdb_file.write_bonds(
		bonds, atom_id
		)


		self.lines = self._pdb_file.lines

-8

test.py

		import fastpdb

		in_file = fastpdb.PDBFile.read("1AKI.pdb")
		atom_array = in_file.get_structure(model=1)

		out_file = fastpdb.PDBFile()
		out_file.set_structure(atom_array)
		out_file.write("test.pdb")

fastpdb - pypi Package Compare versions

Improved metrics