clease
Advanced tools
| Metadata-Version: 2.1 | ||
| Name: clease | ||
| Version: 1.0.2 | ||
| Version: 1.0.3 | ||
| Summary: CLuster Expansion in Atomistic Simulation Environment | ||
@@ -5,0 +5,0 @@ Home-page: https://gitlab.com/computationalmaterials/clease/ |
@@ -16,21 +16,21 @@ ase>=3.22 | ||
| [all] | ||
| sphinx | ||
| pytest-benchmark[histogram]>=3.4.1 | ||
| pre-commit | ||
| black>=22.1.0 | ||
| pip | ||
| pytest-cov | ||
| ipython | ||
| black>=22.1.0 | ||
| mock | ||
| twine | ||
| pytest | ||
| cython | ||
| clang-format>=14.0.3 | ||
| pytest-mock | ||
| tox>=3.24.0 | ||
| build | ||
| pytest | ||
| sphinx_rtd_theme | ||
| pyclean>=2.0.0 | ||
| pytest-benchmark[histogram]>=3.4.1 | ||
| pre-commit | ||
| sphinx | ||
| pylint | ||
| sphinx_rtd_theme | ||
| clease-gui | ||
| mock | ||
| ipython | ||
| twine | ||
| clang-format>=14.0.3 | ||
| build | ||
| tox>=3.24.0 | ||
| pytest-mock | ||
@@ -37,0 +37,0 @@ [dev] |
@@ -1,1 +0,1 @@ | ||
| 1.0.2 | ||
| 1.0.3 |
+81
-27
@@ -8,3 +8,4 @@ """Module that fits ECIs to energy data.""" | ||
| import multiprocessing as mp | ||
| from typing import Dict, List | ||
| from typing import Dict, List, Sequence, Optional | ||
| from collections import defaultdict, Counter | ||
@@ -22,3 +23,8 @@ from deprecated import deprecated | ||
| from clease.cluster_coverage import ClusterCoverageChecker | ||
| from clease.tools import add_file_extension, sort_cf_names, get_size_from_cf_name | ||
| from clease.tools import ( | ||
| add_file_extension, | ||
| sort_cf_names, | ||
| get_size_from_cf_name, | ||
| get_diameter_from_cf_name, | ||
| ) | ||
@@ -101,2 +107,3 @@ __all__ = ("Evaluate", "supports_alpha_cv") | ||
| num_repetitions=1, | ||
| normalization_symbols: Optional[Sequence[str]] = None, | ||
| ): | ||
@@ -170,2 +177,31 @@ """Initialize the Evaluate class.""" | ||
| self.set_normalization(normalization_symbols) | ||
| def set_normalization(self, normalization_symbols: Optional[Sequence[str]] = None) -> None: | ||
| """Set the energy normalization factor, e.g. to normalize the final energy reports | ||
| in energy per metal atom, rather than energy per atom (i.e. every atom). | ||
| :param normalization_symbols: A list of symbols which should be included in the counting. | ||
| If this is None, then the default of normalizing to energy per every atom is maintained. | ||
| """ | ||
| self.normalization = np.ones(len(self.e_dft), dtype=float) | ||
| if normalization_symbols is None: | ||
| return | ||
| # We need to figure out the ratio between the total number of atoms, and the number of | ||
| # symbols we normalize to. | ||
| # Energies are assumed to be in energy per atoms, i.e. normalized by the total number of | ||
| # atoms in the initial cell, including vacancies. | ||
| con = self.settings.connect() | ||
| for ii, uid in enumerate(self.row_ids): | ||
| row = con.get(id=uid) | ||
| # Count the occurence of each symbol | ||
| count = Counter(row.symbols) | ||
| natoms = row.natoms | ||
| new_total = sum(count.get(s, 0) for s in normalization_symbols) | ||
| if new_total > 0: | ||
| # If none of the requested species were found we do not adjust the normalization. | ||
| self.normalization[ii] = natoms / new_total | ||
| @property | ||
@@ -879,8 +915,8 @@ def scoring_scheme(self) -> str: | ||
| self.fit() | ||
| distances = self._distance_from_names() | ||
| # Structure the ECIs in terms by size | ||
| eci_by_size = {} | ||
| for name, d, eci in zip(self.cf_names, distances, self.eci): | ||
| for name, eci in zip(self.cf_names, self.eci): | ||
| size = get_size_from_cf_name(name) | ||
| d = get_diameter_from_cf_name(name) | ||
| if size not in eci_by_size.keys(): | ||
@@ -930,12 +966,6 @@ eci_by_size[size] = {"d": [], "eci": [], "name": []} | ||
| def _distance_from_names(self): | ||
| """Get a list with all the distances for each name.""" | ||
| dists = [] | ||
| for name in self.cf_names: | ||
| if name == "c0" or name.startswith("c1"): | ||
| dists.append(0) | ||
| continue | ||
| dist_str = name.split("_")[1][1:] | ||
| dists.append(int(dist_str)) | ||
| return dists | ||
| def _get_cf_name_radius(self, cf_name: str) -> float: | ||
| """Get the cluster radius of a cf_name""" | ||
| cluster = self.settings.get_cluster_corresponding_to_cf_name(cf_name) | ||
| return cluster.diameter / 2 | ||
@@ -948,2 +978,4 @@ def mae(self): | ||
| delta_e = self.e_dft - e_pred | ||
| delta_e *= self.normalization | ||
| w = np.diag(self.weight_matrix) | ||
@@ -959,2 +991,3 @@ delta_e *= w | ||
| delta_e = self.e_dft - e_pred | ||
| delta_e *= self.normalization | ||
@@ -982,2 +1015,4 @@ w = np.diag(self.weight_matrix) | ||
| delta_e = self.e_dft - e_pred | ||
| delta_e *= self.normalization | ||
| cfm = self.cf_matrix | ||
@@ -988,2 +1023,6 @@ # precision matrix | ||
| self.e_pred_loo = self.e_dft - delta_e_loo | ||
| # Apply energy normalization | ||
| self.e_pred_loo *= self.normalization | ||
| w = np.diag(self.weight_matrix) | ||
@@ -1003,2 +1042,4 @@ cv_sq = np.sum(w * delta_e_loo**2) | ||
| delta_e = self.e_dft[i] - e_pred | ||
| delta_e *= self.normalization[i] | ||
| cv_sq += self.weight_matrix[i, i] * (delta_e) ** 2 | ||
@@ -1022,3 +1063,6 @@ e_pred_loo.append(e_pred) | ||
| e_pred = part["validate_X"].dot(eci) | ||
| scores.append(np.mean((e_pred - part["validate_y"]) ** 2)) | ||
| delta_e = e_pred - part["validate_y"] | ||
| delta_e *= self.normalization[part["validate_index"]] | ||
| scores.append(np.mean(delta_e**2)) | ||
| avg_score += np.sqrt(np.mean(scores)) | ||
@@ -1156,3 +1200,3 @@ return avg_score / self.num_repetitions | ||
| def get_energy_predict(self) -> np.ndarray: | ||
| def get_energy_predict(self, normalize: bool = True) -> np.ndarray: | ||
| """ | ||
@@ -1164,4 +1208,12 @@ Perform matrix multiplication of eci and cf_matrix | ||
| eci = self.get_eci() | ||
| return self.cf_matrix.dot(eci) | ||
| en = self.cf_matrix.dot(eci) | ||
| if normalize: | ||
| return en * self.normalization | ||
| return en | ||
| def get_energy_true(self, normalize: bool = True) -> np.ndarray: | ||
| if normalize: | ||
| return self.e_dft * self.normalization | ||
| return self.e_dft | ||
| def get_eci_by_size(self) -> Dict[str, Dict[str, list]]: | ||
@@ -1171,27 +1223,29 @@ """ | ||
| :return: Dictionary contains | ||
| :return: Dictionary which contains | ||
| * first index | ||
| - body size of cluster | ||
| * Key: body size of cluster | ||
| * Value: A dictionary with the following entries: | ||
| * second index | ||
| - "distance" : distance of the cluster | ||
| - "eci" : eci of the cluster | ||
| - "name" : name of the cluster | ||
| - "radius" : Radius of the cluster in Ångstrom. | ||
| """ | ||
| if self.eci is None: | ||
| raise ValueError("ECIs have not been fitted yet.") | ||
| distances = self._distance_from_names() | ||
| # Structure the ECIs in terms by size | ||
| eci_by_size = {} | ||
| for name, distance, eci in zip(self.cf_names, distances, self.eci): | ||
| eci_by_size = defaultdict(lambda: defaultdict(list)) | ||
| for name, eci in zip(self.cf_names, self.eci): | ||
| size = get_size_from_cf_name(name) | ||
| if size not in eci_by_size.keys(): | ||
| eci_by_size[size] = {"distance": [], "eci": [], "name": []} | ||
| distance = get_diameter_from_cf_name(name) | ||
| radius = self._get_cf_name_radius(name) | ||
| eci_by_size[size]["distance"].append(distance) | ||
| eci_by_size[size]["eci"].append(eci) | ||
| eci_by_size[size]["name"].append(name) | ||
| eci_by_size[size]["radius"].append(radius) | ||
| return eci_by_size | ||
| # Remove the defaultdict factory attributes | ||
| return {k: dict(v) for k, v in eci_by_size.items()} | ||
@@ -1198,0 +1252,0 @@ def print_coverage_report(self, file=sys.stdout) -> None: |
| """Monte Carlo method for ase.""" | ||
| from typing import Dict, Union, Iterator | ||
| from typing import Dict, Union, Iterator, Any | ||
| import sys | ||
@@ -274,3 +274,3 @@ import datetime | ||
| def get_thermodynamic_quantities(self): | ||
| def get_thermodynamic_quantities(self) -> Dict[str, Any]: | ||
| """Compute thermodynamic quantities.""" | ||
@@ -295,6 +295,12 @@ quantities = {} | ||
| # Add information from observers | ||
| for obs in self.observers: | ||
| quantities.update(obs[1].get_averages()) | ||
| quantities.update(self._get_obs_averages()) | ||
| return quantities | ||
| def _get_obs_averages(self) -> Dict[str, Any]: | ||
| """Get average measurements from observers""" | ||
| obs_avgs = {} | ||
| for obs in self.observers: | ||
| obs_avgs.update(obs[1].get_averages()) | ||
| return obs_avgs | ||
| def _calculate_step(self, system_changes: SystemChanges): | ||
@@ -301,0 +307,0 @@ """Calculate energies given a step, and decide if we accept the step. |
@@ -213,2 +213,3 @@ from typing import Sequence, Dict | ||
| quantities["n_mc_steps"] = self.averager.counter | ||
| quantities["accept_rate"] = self.current_accept_rate | ||
@@ -237,4 +238,7 @@ # Add singlets and chemical potential to the dictionary | ||
| # Add information from observers | ||
| quantities.update(self._get_obs_averages()) | ||
| if reset_eci: | ||
| self._reset_eci_to_original(self.atoms.calc.eci) | ||
| return quantities |
@@ -1,5 +0,7 @@ | ||
| from typing import Sequence, List, Set, Tuple | ||
| from typing import Sequence, List, Set, Tuple, Dict | ||
| import random | ||
| from random import choice | ||
| from abc import abstractmethod, ABC | ||
| from ase import Atoms | ||
| from ase.data import chemical_symbols | ||
| from clease.datastructures import SystemChange | ||
@@ -19,3 +21,3 @@ from clease.tools import flatten | ||
| DEFAULT_MAX_ATTEMPTS = 10000 | ||
| DEFAULT_MAX_ATTEMPTS = 10_000 | ||
@@ -159,6 +161,21 @@ | ||
| # Pre-compute the possible flips. We don't want to be computing | ||
| # these maps for each trial move. | ||
| self.flip_map = self._make_possible_flips() | ||
| def _make_possible_flips(self) -> Dict[str, List[str]]: | ||
| """Compute a map of possible flips, given a site has a particular symbol.""" | ||
| possible = {} | ||
| for sym in self.symbols: | ||
| possible[sym] = [s for s in self.symbols if s != sym] | ||
| return possible | ||
| def get_single_trial_move(self) -> List[SystemChange]: | ||
| pos = random.choice(self.indices) | ||
| old_symb = self.atoms[pos].symbol | ||
| new_symb = random.choice([s for s in self.symbols if s != old_symb]) | ||
| """Get a random flip of an included site into a different element.""" | ||
| pos = choice(self.indices) | ||
| # Access to the numbers array of the atoms object is the | ||
| # fastest way of determining a single symbol, by avoiding constructing the entire Symbols | ||
| # array in atoms.symbols | ||
| old_symb = chemical_symbols[self.atoms.numbers[pos]] | ||
| new_symb = choice(self.flip_map[old_symb]) | ||
| return [ | ||
@@ -165,0 +182,0 @@ SystemChange(index=pos, old_symb=old_symb, new_symb=new_symb, name=self.CHANGE_NAME) |
@@ -22,3 +22,3 @@ from typing import List, Tuple | ||
| :param interactive: Add interactive elements to the plot? | ||
| :param interactive: Add interactive elements to the plot. | ||
@@ -29,3 +29,3 @@ :return: Figure instance of plot | ||
| plot_args = {} | ||
| X = evaluate.e_dft | ||
| X = evaluate.get_energy_true() | ||
| Y = evaluate.get_energy_predict() | ||
@@ -107,2 +107,3 @@ xlabel = plot_args.get("xlabel", r"E$_{DFT}$ (eV/atom)") | ||
| - "title": title of plot | ||
| :param interactive: Add interactive elements to the plot. | ||
@@ -113,3 +114,3 @@ :return: Figure instance of plot | ||
| plot_args = {} | ||
| X = evaluate.e_dft | ||
| X = evaluate.get_energy_true() | ||
| Y = evaluate.get_energy_predict() - X # eV/atom | ||
@@ -159,2 +160,3 @@ Y *= 1000 # meV/atom | ||
| ignore_sizes=(), | ||
| interactive: bool = False, | ||
| ) -> Figure: | ||
@@ -178,2 +180,3 @@ """ | ||
| Default is to not ignore any clusters. | ||
| :param interactive: Add interactive elements to the plot. | ||
@@ -193,2 +196,3 @@ :return: Figure instance of plot | ||
| lines = [] | ||
| annotations = [] | ||
@@ -212,4 +216,30 @@ fig = plt.figure() | ||
| line = ax.plot(X, Y, label=f"{size}-body", marker=mrk, mfc="none", ls="", markersize=8) | ||
| lines.append(line[0]) | ||
| # Make annotations for interactive plots, since we have all the data we need prepared here. | ||
| if interactive: | ||
| lines.append(line[0]) | ||
| annot = [ | ||
| ( | ||
| f"Size: {size}\nDiameter: {dist:d}\nName: {name}\n" | ||
| f"Radius: {radius:.3f} Å\nECI: {eci:.4f} eV/atom" | ||
| ) | ||
| for dist, name, eci, radius in zip(X, data["name"], Y, data["radius"]) | ||
| ] | ||
| annotations.append(annot) | ||
| ax.legend() | ||
| if interactive: | ||
| # pylint: disable=import-outside-toplevel | ||
| from clease.interactive_plot import InteractivePlot, AnnotatedAx | ||
| # Construct the annotated axis objects. | ||
| annotated_ax = AnnotatedAx( | ||
| ax, | ||
| lines, | ||
| annotations, | ||
| ) | ||
| # Attach interactivity to the fig object. | ||
| InteractivePlot(fig, annotated_ax) | ||
| return fig | ||
@@ -342,2 +372,3 @@ | ||
| e_pred = evaluate.get_energy_predict() | ||
| e_dft = evaluate.get_energy_true() | ||
@@ -347,4 +378,4 @@ def format_annotation_dft(idx): | ||
| row_id = evaluate.row_ids[idx] | ||
| e_dft = evaluate.e_dft[idx] | ||
| return f"DB ID: {row_id}\nName: {name}\nE(DFT): {e_dft:.4f} eV/atom" | ||
| en = e_dft[idx] | ||
| return f"DB ID: {row_id}\nName: {name}\nE(DFT): {en:.4f} eV/atom" | ||
@@ -366,2 +397,3 @@ def format_annotation_ce(idx): | ||
| e_pred = evaluate.get_energy_predict() | ||
| e_dft = evaluate.get_energy_true() | ||
@@ -371,9 +403,7 @@ def format_annotation(idx): | ||
| row_id = evaluate.row_ids[idx] | ||
| e_dft = evaluate.e_dft[idx] | ||
| en = e_dft[idx] | ||
| e_ce = e_pred[idx] | ||
| return ( | ||
| f"DB ID: {row_id}\nName: {name}\nE(DFT): {e_dft:.4f} eV/atom\nE(CE): {e_ce:.4f} eV/atom" | ||
| ) | ||
| return f"DB ID: {row_id}\nName: {name}\nE(DFT): {en:.4f} eV/atom\nE(CE): {e_ce:.4f} eV/atom" | ||
| N = len(e_pred) | ||
| return ([format_annotation(idx) for idx in range(N)],) |
@@ -10,3 +10,3 @@ from typing import List, Sequence, Dict, Union, Callable, Tuple | ||
| from clease.data_normalizer import DataNormalizer | ||
| from clease.tools import split_dataset, get_size_from_cf_name | ||
| from clease.tools import get_diameter_from_cf_name, split_dataset, get_size_from_cf_name | ||
| from .regression import LinearRegression | ||
@@ -159,12 +159,3 @@ from .constrained_ridge import ConstrainedRidge | ||
| """ | ||
| diameters = [] | ||
| for n in names: | ||
| size = get_size_from_cf_name(n) | ||
| if size in {0, 1}: | ||
| diameters.append(0.0) | ||
| else: | ||
| dia_str = n.split("_")[1] | ||
| dia = int(dia_str[1:]) | ||
| diameters.append(dia) | ||
| self.diameters = diameters | ||
| self.diameters = [get_diameter_from_cf_name(n) for n in names] | ||
@@ -171,0 +162,0 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> np.ndarray: |
@@ -18,4 +18,4 @@ """Definition of ClusterExpansionSettings Class. | ||
| from clease.jsonio import jsonable | ||
| from clease.tools import wrap_and_sort_by_position | ||
| from clease.cluster import ClusterManager, ClusterList | ||
| from clease.tools import get_size_from_cf_name, wrap_and_sort_by_position | ||
| from clease.cluster import ClusterManager, ClusterList, Cluster | ||
| from clease.basis_function import Polynomial, Trigonometric, BinaryLinear, BasisFunction | ||
@@ -707,2 +707,4 @@ from clease.datastructures import TransMatrix | ||
| """String with information about the clusters""" | ||
| # We need the clusters to be constructed first. | ||
| self.ensure_clusters_exist() | ||
| mult_dict = self.multiplicity_factor | ||
@@ -737,2 +739,3 @@ | ||
| # Bottom horizontal line | ||
| lines.append(rule) | ||
@@ -742,3 +745,26 @@ | ||
| def get_cluster_corresponding_to_cf_name(self, cf_name: str) -> Cluster: | ||
| """Find the Cluster object which corresponds to a CF name. | ||
| The cluster will not be specialized to the decoration number if such exists | ||
| in the cf name. | ||
| Example: | ||
| >>> from clease.settings import CEBulk, Concentration | ||
| >>> conc = Concentration([['Au', 'Cu']]) | ||
| >>> settings = CEBulk(conc, crystalstructure='fcc', a=4.1) | ||
| >>> cluster = settings.get_cluster_corresponding_to_cf_name("c1_0") | ||
| >>> cluster.size | ||
| 1 | ||
| """ | ||
| size = get_size_from_cf_name(cf_name) | ||
| for cluster in self.cluster_list.clusters: | ||
| # This CF name may contain the decoration number, but the cluster name doesn't. | ||
| # Decoration number is last, so test everything before that is equal. | ||
| if cluster.size == size and cf_name.startswith(cluster.name): | ||
| return cluster | ||
| raise RuntimeError(f"Didn't find cluster corresponding to name: {cf_name}") | ||
| def _get_concentration(concentration: Union[Concentration, dict]) -> Concentration: | ||
@@ -745,0 +771,0 @@ """Helper function to format the concentration""" |
+31
-18
@@ -328,7 +328,14 @@ # pylint: disable=too-many-lines | ||
| index_mask[j] = 1 | ||
| train_mask = index_mask == 0 | ||
| validate_mask = index_mask == 1 | ||
| train_index = np.arange(len(y))[train_mask] | ||
| validate_index = np.arange(len(y))[validate_mask] | ||
| data = { | ||
| "train_X": X[index_mask == 0, :], | ||
| "train_y": y[index_mask == 0], | ||
| "validate_X": X[index_mask == 1, :], | ||
| "validate_y": y[index_mask == 1], | ||
| "train_X": X[train_mask, :], | ||
| "train_y": y[train_mask], | ||
| "train_index": train_index, | ||
| "validate_X": X[validate_mask, :], | ||
| "validate_y": y[validate_mask], | ||
| "validate_index": validate_index, | ||
| } | ||
@@ -903,2 +910,18 @@ partitions.append(data) | ||
| def get_diameter_from_cf_name(cf_name: str) -> int: | ||
| """Extract the diameter ground from the name. | ||
| Example: "c2_d0001_0_00" returns 1.""" | ||
| size = get_size_from_cf_name(cf_name) | ||
| if size in {0, 1}: | ||
| # 0- and 1-body clusters have a slightly different convention. | ||
| return 0 | ||
| dia_str = cf_name.split("_")[1] | ||
| # The diameter delimiter should start with a "d". | ||
| if not dia_str.startswith("d"): | ||
| raise ValueError( | ||
| f"Diameter format looks incorrect for cf name '{cf_name}'. Found '{dia_str}'." | ||
| ) | ||
| return int(dia_str[1:]) | ||
| def sort_cf_names(cf_names: tIterable[str]) -> List[str]: | ||
@@ -913,17 +936,7 @@ """ | ||
| """ | ||
| sizes = [get_size_from_cf_name(n) for n in cf_names] | ||
| # Regular expression that extracts all digits after the occurence | ||
| # of _d (e.g. c2_d0001_0_00 --> 0001) | ||
| prog = re.compile("_d(\\d+)") | ||
| dia_str = [prog.findall(n) for n in cf_names] | ||
| dia = [] | ||
| for d in dia_str: | ||
| if d: | ||
| dia.append(int(d[0])) | ||
| else: | ||
| dia.append(0) | ||
| # Helper sorting function to define the order of the sorting. | ||
| def _sort_ordering(name: str): | ||
| return get_size_from_cf_name(name), get_diameter_from_cf_name(name), name | ||
| sort_obj = list(zip(sizes, dia, cf_names)) | ||
| sort_obj.sort() | ||
| return [s[-1] for s in sort_obj] | ||
| return sorted(cf_names, key=_sort_ordering) | ||
@@ -930,0 +943,0 @@ |
@@ -7,2 +7,11 @@ .. _releasenotes: | ||
| 1.0.3 | ||
| ====== | ||
| * Getting thermodynamic quantities in the SGC MC now also retrieves averages from observers. | ||
| * Added `interactive` option to :py:func:`~clease.plot_post_process.plot_eci` | ||
| * Added :meth:`~clease.settings.ClusterExpansionSettings.get_cluster_corresponding_to_cf_name`. | ||
| * Minor performance improvements to SGC MC. | ||
| * Added :meth:`~clease.evaluate.Evaluate.set_normalization` for adjusting what elements to normalize by. | ||
| Default is to normalize by everything. | ||
| 1.0.2 | ||
@@ -9,0 +18,0 @@ ====== |
+1
-1
| Metadata-Version: 2.1 | ||
| Name: clease | ||
| Version: 1.0.2 | ||
| Version: 1.0.3 | ||
| Summary: CLuster Expansion in Atomistic Simulation Environment | ||
@@ -5,0 +5,0 @@ Home-page: https://gitlab.com/computationalmaterials/clease/ |
Sorry, the diff of this file is too big to display
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
1604436
0.51%16851
0.67%