Source code for amptorch.descriptor.GMPOrderNorm

import hashlib

import numpy as np
from scipy import sparse

from ..base_descriptor import BaseDescriptor
from ..constants import ATOM_SYMBOL_TO_INDEX_DICT
from ..util import _gen_2Darray_for_ffi, list_symbols_to_indices
from ._libgmpordernorm import ffi, lib


[docs]class GMPOrderNorm(BaseDescriptor): """ Fingerprinting calculation for GMP with normalization over orders. Args: MCSHs [dict] : a dictionary containing the parameters for MCSH definition. elements [dict] : a dictionary of string of chemical elements in the system. """ def __init__( self, MCSHs, elements, # mode="atom-centered", ): super().__init__() self.descriptor_type = "GMPOrderNorm" self.MCSHs = MCSHs self.elements = elements self.element_indices = list_symbols_to_indices(elements) if "cutoff" not in self.MCSHs: self.default_cutoff() self.prepare_descriptor_parameters() self.get_descriptor_setup_hash() def __eq__(self, other): """Overrides the default implementation""" if isinstance(other, BaseDescriptor): if self.descriptor_type != other.descriptor_type: return False if self.descriptor_setup_hash != other.descriptor_setup_hash: return False return True return NotImplemented
[docs] def default_cutoff(self, threshold=1e-8): max_sigma = 0.0 if "MCSHs_detailed_list" in self.MCSHs: for detail_setup in self.MCSHs["MCSHs_detailed_list"]: sigmas = detail_setup["sigmas"] max_sigma = max(max(sigmas), max_sigma) else: sigmas = self.MCSHs["MCSHs"]["sigmas"] max_sigma = max(max(sigmas), max_sigma) A = 1.0 / (max_sigma * np.sqrt(2.0 * np.pi)) alpha = 1.0 / (2.0 * max_sigma * max_sigma) cutoff = round(np.sqrt(np.log(threshold / A) / (-alpha)), 2) print("default cutoff distance: {} A".format(cutoff)) self.MCSHs["cutoff"] = cutoff return
[docs] def prepare_descriptor_parameters(self): descriptor_setup = [] cutoff = self.MCSHs["cutoff"] self.solid_harmonic = self.MCSHs.get("solid_harmonics", True) solid_harmonic_i = 1 if self.solid_harmonic else 0 square = self.MCSHs.get("square", True) square_i = 1 if square else 0 rs_setup = self.MCSHs.get("rs_setup", {"setup": "constant", "rs": 1.0}) if rs_setup["setup"] == "scale": rs_scale = rs_setup["scale_factor"] elif rs_setup["setup"] == "constant": rs_scale = -1.0 * rs_setup["rs"] else: rs_scale = -1.0 if "MCSHs_detailed_list" in self.MCSHs: for detail_setup in self.MCSHs["MCSHs_detailed_list"]: sigmas = detail_setup["sigmas"] descriptor_setup += [ [ detail_setup["order"], square_i, solid_harmonic_i, sigma, 1.0, (1.0 / (sigma * np.sqrt(2.0 * np.pi))) ** 3, 1.0 / (2.0 * sigma * sigma), cutoff, (1.0 / (rs_scale * sigma)) if rs_scale > 0 else (1.0 / (-1.0 * rs_scale)), ] for sigma in sigmas ] else: for i in self.MCSHs["MCSHs"]["orders"]: descriptor_setup += [ [ i, square_i, solid_harmonic_i, sigma, 1.0, (1.0 / (sigma * np.sqrt(2.0 * np.pi))) ** 3, 1.0 / (2.0 * sigma * sigma), cutoff, (1.0 / (rs_scale * sigma)) if rs_scale > 0 else (1.0 / (-1.0 * rs_scale)), ] for sigma in self.MCSHs["MCSHs"]["sigmas"] ] self.descriptor_setup = np.array(descriptor_setup) # print(self.descriptor_setup) # load pseudo-densities if not provided if not self.MCSHs.get("atom_gaussians", None): self.load_pseudo_densities(self.elements) atomic_gaussian_setup = {} for element in self.elements: params = list() # count = 0 filename = self.MCSHs["atom_gaussians"][element] with open(filename, "r") as fil: for line in fil: tmp = line.split() params += [float(tmp[0]), float(tmp[1])] # count += 1 element_index = ATOM_SYMBOL_TO_INDEX_DICT[element] params = np.asarray(params, dtype=np.float64, order="C") atomic_gaussian_setup[element_index] = params self.atomic_gaussian_setup = atomic_gaussian_setup max_gaussian_count = 0 ngaussian_list = list() self.params_set = dict() for element_index in self.element_indices: self.params_set[element_index] = dict() self.params_set[element_index][ "gaussian_params" ] = self.atomic_gaussian_setup[element_index] self.params_set[element_index]["gaussian_count"] = int( len(self.atomic_gaussian_setup[element_index]) / 2 ) ngaussian_list.append(self.params_set[element_index]["gaussian_count"]) # print("self.params_set[element_index]: {}".format(self.params_set[element_index])) # print("ngaussian_list: {}".format(ngaussian_list)) ngaussian_list = np.asarray(ngaussian_list, dtype=np.intc, order="C") max_gaussian_count = np.max(ngaussian_list) overall_gaussian_params = list() for element_index in self.element_indices: temp = np.zeros(max_gaussian_count * 2) temp[ : self.params_set[element_index]["gaussian_count"] * 2 ] = self.params_set[element_index]["gaussian_params"] overall_gaussian_params.append(temp) element_index_to_order_list = np.zeros(120, dtype=np.intc) for i, element_index in enumerate(self.element_indices): # element_index_to_order_list.append(element_index) element_index_to_order_list[element_index] = i # element_index_to_order_list = np.asarray(element_index_to_order_list, dtype=np.intc, order='C') overall_gaussian_params = np.asarray( overall_gaussian_params, dtype=np.float64, order="C" ) self.params_set["ngaussians"] = ngaussian_list self.params_set["ngaussians_p"] = ffi.cast("int *", ngaussian_list.ctypes.data) self.params_set["gaussian_params"] = overall_gaussian_params self.params_set["gaussian_params_p"] = _gen_2Darray_for_ffi( overall_gaussian_params, ffi ) self.params_set["element_index_to_order"] = element_index_to_order_list self.params_set["element_index_to_order_p"] = ffi.cast( "int *", element_index_to_order_list.ctypes.data ) # print("ngaussians: {}".format(self.params_set["ngaussians"])) # print("gaussian_params: {}".format(self.params_set["gaussian_params"])) params_i = np.asarray( self.descriptor_setup[:, :3].copy(), dtype=np.intc, order="C" ) params_d = np.asarray( self.descriptor_setup[:, 3:].copy(), dtype=np.float64, order="C" ) self.params_set["i"] = params_i self.params_set["d"] = params_d self.params_set["ip"] = _gen_2Darray_for_ffi(self.params_set["i"], ffi, "int") self.params_set["dp"] = _gen_2Darray_for_ffi(self.params_set["d"], ffi) self.params_set["total"] = np.concatenate( (self.params_set["i"], self.params_set["d"]), axis=1 ) self.params_set["num"] = len(self.params_set["total"]) if "prime_threshold" in self.MCSHs: self.params_set["prime_threshold"] = float(self.MCSHs["prime_threshold"]) self.params_set["log"] = self.MCSHs.get("log", False) return
[docs] def load_pseudo_densities(self, elements): import os # get absolute path to the package pkg_path = os.path.dirname(__file__) psd_path = os.path.join(pkg_path, "../utils/pseudodensity_psp_v3") res = {} for element in elements: res[element] = os.path.join(psd_path, f"{element}_pseudodensity.g") self.MCSHs["atom_gaussians"] = res
[docs] def get_descriptor_setup_hash(self): # set self.descriptor_setup_hash string = "" for desc in self.descriptor_setup: for num in desc: string += "%.15f" % num md5 = hashlib.md5(string.encode("utf-8")) hash_result = md5.hexdigest() self.descriptor_setup_hash = hash_result
[docs] def save_descriptor_setup(self, filename): with open(filename, "w") as out_file: for desc in self.descriptor_setup: out_file.write( "{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format( int(desc[0]), int(desc[1]), desc[2], desc[3], desc[4], desc[5], desc[6], ) )
[docs] def calculate_fingerprints(self, atoms, element, calc_derivatives, log): element_index = ATOM_SYMBOL_TO_INDEX_DICT[element] symbols = np.array(atoms.get_chemical_symbols()) atom_num = len(symbols) atom_indices = list_symbols_to_indices(symbols) unique_atom_indices = np.unique(atom_indices) type_num = dict() type_idx = dict() for atom_index in unique_atom_indices: tmp = atom_indices == atom_index type_num[atom_index] = np.sum(tmp).astype(np.int64) # if atom indexs are sorted by atom type, # indexs are sorted in this part. # if not, it could generate bug in training process for force training type_idx[atom_index] = np.arange(atom_num)[tmp] atom_indices_p = ffi.cast("int *", atom_indices.ctypes.data) cart = np.copy(atoms.get_positions(wrap=True), order="C") scale = np.copy(atoms.get_scaled_positions(wrap=True), order="C") cell = np.copy(atoms.cell, order="C") pbc = np.copy(atoms.get_pbc()).astype(np.intc) cart_p = _gen_2Darray_for_ffi(cart, ffi) scale_p = _gen_2Darray_for_ffi(scale, ffi) cell_p = _gen_2Darray_for_ffi(cell, ffi) pbc_p = ffi.cast("int *", pbc.ctypes.data) cal_atoms = np.asarray(type_idx[element_index], dtype=np.intc, order="C") cal_num = len(cal_atoms) # print("calculate atom length: {}\ttotal:{}".format(cal_num, atom_num)) cal_atoms_p = ffi.cast("int *", cal_atoms.ctypes.data) # print(self.params_set['i']) # print(self.params_set['d']) # print(self.params_set['gaussian_params']) # print(self.params_set['ngaussians']) # print(self.params_set['element_index_to_order']) # print(self.params_set['num']) # print(atom_indices) size_info = np.array([atom_num, cal_num, self.params_set["num"]]) if calc_derivatives: x = np.zeros([cal_num, self.params_set["num"]], dtype=np.float64, order="C") dx = np.zeros( [cal_num * self.params_set["num"], atom_num * 3], dtype=np.float64, order="C", ) x_p = _gen_2Darray_for_ffi(x, ffi) dx_p = _gen_2Darray_for_ffi(dx, ffi) if self.solid_harmonic: errno = lib.calculate_solid_gmpordernorm( cell_p, cart_p, scale_p, pbc_p, atom_indices_p, atom_num, cal_atoms_p, cal_num, self.params_set["ip"], self.params_set["dp"], self.params_set["num"], self.params_set["gaussian_params_p"], self.params_set["ngaussians_p"], self.params_set["element_index_to_order_p"], x_p, dx_p, ) else: errno = lib.calculate_gmpordernorm( cell_p, cart_p, scale_p, pbc_p, atom_indices_p, atom_num, cal_atoms_p, cal_num, self.params_set["ip"], self.params_set["dp"], self.params_set["num"], self.params_set["gaussian_params_p"], self.params_set["ngaussians_p"], self.params_set["element_index_to_order_p"], x_p, dx_p, ) if errno == 1: raise NotImplementedError("Descriptor not implemented!") fp = np.array(x, dtype=np.float64) fp_prime = np.array(dx, dtype=np.float64) fp_prime[np.abs(fp_prime) < 1e-5] = 0.0 # threshold = 1e-9 # super_threshold_indices_prime = np.abs(fp_prime) < threshold # print("threshold: {} \tnum points set to zero:{} \t outof: {}".format(threshold, np.sum(super_threshold_indices_prime), fp_prime.shape[0] * fp_prime.shape[1])) # fp_prime[super_threshold_indices_prime] = 0.0 scipy_sparse_fp_prime = sparse.coo_matrix(fp_prime) # print(fp) # print(fp.shape) # print(np.sum(super_threshold_indices)) # print(np.min(np.abs(scipy_sparse_fp_prime.data))) # print("density: {}% \n\n----------------------".format(100*len(scipy_sparse_fp_prime.data) / (fp_prime.shape[0] * fp_prime.shape[1]))) if self.params_set["log"]: raise NotImplementedError return ( size_info, fp, scipy_sparse_fp_prime.data, scipy_sparse_fp_prime.row, scipy_sparse_fp_prime.col, np.array(fp_prime.shape), ) else: x = np.zeros([cal_num, self.params_set["num"]], dtype=np.float64, order="C") x_p = _gen_2Darray_for_ffi(x, ffi) if self.solid_harmonic: errno = lib.calculate_solid_gmpordernorm_noderiv( cell_p, cart_p, scale_p, pbc_p, atom_indices_p, atom_num, cal_atoms_p, cal_num, self.params_set["ip"], self.params_set["dp"], self.params_set["num"], self.params_set["gaussian_params_p"], self.params_set["ngaussians_p"], self.params_set["element_index_to_order_p"], x_p, ) else: errno = lib.calculate_gmpordernorm_noderiv( cell_p, cart_p, scale_p, pbc_p, atom_indices_p, atom_num, cal_atoms_p, cal_num, self.params_set["ip"], self.params_set["dp"], self.params_set["num"], self.params_set["gaussian_params_p"], self.params_set["ngaussians_p"], self.params_set["element_index_to_order_p"], x_p, ) if errno == 1: raise NotImplementedError("Descriptor not implemented!") fp = np.array(x, dtype=np.float64) if self.params_set["log"]: fp = np.abs(fp) fp[fp < 1e-8] = 1e-8 fp = np.log10(fp) return size_info, fp, None, None, None, None