import hashlib
import numpy as np
from scipy import sparse
from ..base_descriptor import BaseDescriptor
from ..constants import ATOM_SYMBOL_TO_INDEX_DICT
from ..util import _gen_2Darray_for_ffi, list_symbols_to_indices
from ._libsymf import ffi, lib
from .descriptor_set import GaussianDescriptorSet
GDS = GaussianDescriptorSet # so Flake8 allows the commit to proceed, also an easy-access-acronym!
[docs]class Gaussian(BaseDescriptor):
"""
Fingerprinting calculation for Symmetry Functions.
Args:
Gs [dict] : a dictionary containing the definition of G2 and G4 parameters.
elements [dict] : a dictionary of string of chemical elements in the system.
cutoff_func [str] : defines the form of f_c. Default to "cosine".
gamma [float] : parameter for polynomial cutoff function. Default to None as the default cutoff function is cosine.
"""
def __init__(self, Gs, elements, cutoff_func="cosine", gamma=None):
super().__init__()
self.descriptor_type = "Gaussian"
self.Gs = Gs
self.elements = list(elements)
self.cutoff_func = cutoff_func.lower()
if self.cutoff_func not in ["cosine", "polynomial"]:
raise ValueError('cutoff function must be either "cosine" or "polynomial"')
if self.cutoff_func == "polynomial":
if gamma is None:
raise ValueError(
"polynomial cutoff function requires float value > 0. of `gamma`"
)
elif gamma <= 0.0:
raise ValueError("polynomial cutoff function gamma must be > 0.")
self.gamma = gamma
self.element_indices = list_symbols_to_indices(elements)
self.prepare_descriptor_parameters()
self.get_descriptor_setup_hash()
def __eq__(self, other):
"""Overrides the default implementation"""
if isinstance(other, BaseDescriptor):
if self.descriptor_type != other.descriptor_type:
return False
if self.elements != other.elements:
return False
for element in self.Gs:
if element not in other.Gs:
return False
if self.Gs[element]["cutoff"] != other.Gs[element]["cutoff"]:
return False
if "G2" in self.Gs[element]:
for key in self.Gs[element]["G2"]:
if list(self.Gs[element]["G2"][key]) != list(
other.Gs[element]["G2"][key]
):
return False
if "G4" in self.Gs[element]:
for key in self.Gs[element]["G4"]:
if list(self.Gs[element]["G4"][key]) != list(
other.Gs[element]["G4"][key]
):
return False
if "G5" in self.Gs[element]:
for key in self.Gs[element]["G5"]:
if list(self.Gs[element]["G5"][key]) != list(
other.Gs[element]["G5"][key]
):
return False
if self.cutoff_func != other.cutoff_func:
return False
return True
return NotImplemented
[docs] def prepare_descriptor_parameters(self):
"""
A helper function to prepare the parameters as input to cffi.
"""
if isinstance(self.Gs, dict):
self.descriptor_setup = {}
for element in self.elements:
if element in self.Gs:
self.descriptor_setup[
element
] = self._prepare_descriptor_parameters_element(
self.Gs[element], self.element_indices
)
elif "default" in self.Gs:
self.descriptor_setup[
element
] = self._prepare_descriptor_parameters_element(
self.Gs["default"], self.element_indices
)
else:
raise NotImplementedError(
"Symmetry function parameters not defined properly"
)
elif hasattr(self.Gs, "descriptor_setup"):
self.descriptor_setup = self.Gs.descriptor_setup
else:
raise ValueError(
"Gs must be a dict with descriptor params or a GaussianDescriptorSet object: passed was a (%s)"
% type(self.Gs)
)
self.params_set = dict()
for element in self.elements:
element_index = ATOM_SYMBOL_TO_INDEX_DICT[element]
self.params_set[element_index] = dict()
params_i = np.asarray(
self.descriptor_setup[element][:, :3].copy(), dtype=np.intc, order="C"
)
params_d = np.asarray(
self.descriptor_setup[element][:, 3:].copy(),
dtype=np.float64,
order="C",
)
self.params_set[element_index]["i"] = params_i
self.params_set[element_index]["d"] = params_d
self.params_set[element_index]["ip"] = _gen_2Darray_for_ffi(
self.params_set[element_index]["i"], ffi, "int"
)
self.params_set[element_index]["dp"] = _gen_2Darray_for_ffi(
self.params_set[element_index]["d"], ffi
)
self.params_set[element_index]["total"] = np.concatenate(
(
self.params_set[element_index]["i"],
self.params_set[element_index]["d"],
),
axis=1,
)
self.params_set[element_index]["num"] = len(self.descriptor_setup[element])
return
def _prepare_descriptor_parameters_element(self, Gs, element_indices):
descriptor_setup = {"G2": set(), "G4": set(), "G5": set()}
cutoff = Gs["cutoff"]
if "G2" in Gs:
descriptor_setup["G2"].update(
[
(2, element1, 0, cutoff, eta, rs, 0.0)
for eta in np.array(Gs["G2"]["etas"]) / cutoff**2
for rs in Gs["G2"]["rs_s"]
for element1 in element_indices
]
)
if "G4" in Gs:
descriptor_setup["G4"].update(
[
(
4,
element_indices[i],
element_indices[j],
cutoff,
eta,
zeta,
gamma,
)
for eta in (np.array(Gs["G4"]["etas"]) / cutoff**2)
for zeta in Gs["G4"]["zetas"]
for gamma in Gs["G4"]["gammas"]
for i in range(len(element_indices))
for j in range(i, len(element_indices))
]
)
if "G5" in Gs:
descriptor_setup["G5"].update(
[
(
5,
element_indices[i],
element_indices[j],
cutoff,
eta,
zeta,
gamma,
)
for eta in Gs["G5"]["etas"]
for zeta in Gs["G5"]["zetas"]
for gamma in Gs["G5"]["gammas"]
for i in range(len(element_indices))
for j in range(i, len(element_indices))
]
)
g2s, g4s, g5s = (
descriptor_setup["G2"],
descriptor_setup["G4"],
descriptor_setup["G5"],
)
g2s = [list(params) for params in sorted(g2s)]
g4s = [list(params) for params in sorted(g4s)]
g5s = [list(params) for params in sorted(g5s)]
descriptor_setup = np.array(g2s + g4s + g5s)
return descriptor_setup
[docs] def get_descriptor_setup_hash(self):
if isinstance(self.Gs, dict):
string = (
"cosine"
if self.cutoff_func == "cosine"
else "polynomial%.15f" % self.gamma
)
for element in self.descriptor_setup.keys():
string += element
for desc in self.descriptor_setup[element]:
for num in desc:
string += "%.15f" % num
md5 = hashlib.md5(string.encode("utf-8"))
hash_result = md5.hexdigest()
self.descriptor_setup_hash = hash_result
elif hasattr(self.Gs, "descriptor_setup_hash"):
self.descriptor_setup_hash = self.Gs.descriptor_setup_hash
else:
raise ValueError(
"Gs must be a dict with descriptor params or a GaussianDescriptorSet object: passed was a (%s)"
% type(self.Gs)
)
[docs] def save_descriptor_setup(self, filename):
with open(filename, "w") as out_file:
for element in self.descriptor_setup.keys():
out_file.write(
"===========\nElement: {} \t num_desc: {}\n".format(
element, len(self.descriptor_setup[element])
)
)
for desc in self.descriptor_setup[element]:
out_file.write(
"{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
int(desc[0]),
int(desc[1]),
int(desc[2]),
desc[3],
desc[4],
desc[5],
desc[6],
)
)
[docs] def calculate_fingerprints(self, atoms, element, calc_derivatives, log):
"""
Interfacing with cffi to compute the fingerprints.
"""
element_index = ATOM_SYMBOL_TO_INDEX_DICT[element]
symbols = np.array(atoms.get_chemical_symbols())
atom_num = len(symbols)
atom_indices = list_symbols_to_indices(symbols)
unique_atom_indices = np.unique(atom_indices)
type_num = dict()
type_idx = dict()
for atom_index in unique_atom_indices:
tmp = atom_indices == atom_index
type_num[atom_index] = np.sum(tmp).astype(np.int64)
# if atom indexs are sorted by atom type,
# indexs are sorted in this part.
# if not, it could generate bug in training process for force training
type_idx[atom_index] = np.arange(atom_num)[tmp]
atom_indices_p = ffi.cast("int *", atom_indices.ctypes.data)
cart = np.copy(atoms.get_positions(wrap=True), order="C")
scale = np.copy(atoms.get_scaled_positions(wrap=True), order="C")
cell = np.copy(atoms.cell, order="C")
pbc = np.copy(atoms.get_pbc()).astype(np.intc)
cart_p = _gen_2Darray_for_ffi(cart, ffi)
scale_p = _gen_2Darray_for_ffi(scale, ffi)
cell_p = _gen_2Darray_for_ffi(cell, ffi)
pbc_p = ffi.cast("int *", pbc.ctypes.data)
cal_atoms = np.asarray(type_idx[element_index], dtype=np.intc, order="C")
cal_num = len(cal_atoms)
cal_atoms_p = ffi.cast("int *", cal_atoms.ctypes.data)
size_info = np.array([atom_num, cal_num, self.params_set[element_index]["num"]])
if calc_derivatives:
x = np.zeros(
[cal_num, self.params_set[element_index]["num"]],
dtype=np.float64,
order="C",
)
dx = np.zeros(
[cal_num * self.params_set[element_index]["num"], atom_num * 3],
dtype=np.float64,
order="C",
)
x_p = _gen_2Darray_for_ffi(x, ffi)
dx_p = _gen_2Darray_for_ffi(dx, ffi)
errno = (
lib.calculate_sf_cos(
cell_p,
cart_p,
scale_p,
pbc_p,
atom_indices_p,
atom_num,
cal_atoms_p,
cal_num,
self.params_set[element_index]["ip"],
self.params_set[element_index]["dp"],
self.params_set[element_index]["num"],
x_p,
dx_p,
)
if self.cutoff_func == "cosine"
else lib.calculate_sf_poly(
cell_p,
cart_p,
scale_p,
pbc_p,
atom_indices_p,
atom_num,
cal_atoms_p,
cal_num,
self.params_set[element_index]["ip"],
self.params_set[element_index]["dp"],
self.params_set[element_index]["num"],
x_p,
dx_p,
self.gamma,
)
)
if errno == 1:
raise NotImplementedError("Descriptor not implemented!")
fp = np.array(x)
fp_prime = np.array(dx)
scipy_sparse_fp_prime = sparse.coo_matrix(fp_prime)
return (
size_info,
fp,
scipy_sparse_fp_prime.data,
scipy_sparse_fp_prime.row,
scipy_sparse_fp_prime.col,
np.array(fp_prime.shape),
)
else:
x = np.zeros(
[cal_num, self.params_set[element_index]["num"]],
dtype=np.float64,
order="C",
)
x_p = _gen_2Darray_for_ffi(x, ffi)
errno = (
lib.calculate_sf_cos_noderiv(
cell_p,
cart_p,
scale_p,
pbc_p,
atom_indices_p,
atom_num,
cal_atoms_p,
cal_num,
self.params_set[element_index]["ip"],
self.params_set[element_index]["dp"],
self.params_set[element_index]["num"],
x_p,
)
if self.cutoff_func == "cosine"
else lib.calculate_sf_poly_noderiv(
cell_p,
cart_p,
scale_p,
pbc_p,
atom_indices_p,
atom_num,
cal_atoms_p,
cal_num,
self.params_set[element_index]["ip"],
self.params_set[element_index]["dp"],
self.params_set[element_index]["num"],
x_p,
dx_p,
self.gamma,
)
)
if errno == 1:
raise NotImplementedError("Descriptor not implemented!")
fp = np.array(x)
return size_info, fp, None, None, None, None