GOFEE

GOFEE builds all operators from scratch because the implementation in ase.ga is more time consuming

Fingerprint

Radial features $$ F_{AB}(r) \propto \begin{cases} \sum_{i,j} \frac{1}{r_{ij}^2} \text{exp}(-\dfrac{(r-r_{ij})^2}{2 \sigma_\text{radial}^2}) & R < r_\text{cut_radial} \ 0 & R \ge r_\text{cut_radial} \end{cases} $$

Angular features $$ F_{ABC}(\theta) \propto \sum_{i,j,k} f_c(r_{ij}) f_c(r_{ik}) \text{exp}(-\dfrac{(\theta-\theta_{ijk})^2}{2 \sigma_\text{angular}^2}) $$

The cutoff function is defined as $$ f_c(r) = 1 + \gamma (\frac{r}{r_\text{cut_angular}})^{\gamma + 1} - (\gamma + 1) (\frac{r}{r_\text{cut_angular}})^{\gamma} $$

If there are only two elements (saying carbon and hydrogen) in a periodic system, then the feature of the entire structure will be $$ F_\text{global} = [F_{HH}, F_{HC}, F_{CC}, \eta F_{HHH}, \eta F_{HHC}, \eta F_{HCC}, \eta F_{CHH}, \eta F_{CHC}, \eta F_{CCC}] $$ The nubmer of bins detemines the deminsion of each entry in the above equation.

If the global feature of a molecule is requested, the number of available atoms will be checked. For example, the global feature of C2H4 is computed as $$ F_\text{global} (\text{C2H4}) = [F_{HH}, F_{HC}, F_{CC}, \eta F_{HHH}, \eta F_{HHC}, \eta F_{HCC}, \eta F_{CHH}, \eta F_{CHC}] $$ It is impossible to have $F_{CCC}$ since there are only 2 carbon atoms.

In the Cython's implementation

# radial features
for i in n_atoms:
  for idx_cell in cell_displacements:
    for j in n_atoms:
      r_ij
      for idx_bin in bins:
        radial_feature(r_ij, idx_bin)

# angular features
for i in n_atoms:
  for idx_cell_1 in cell_displacements_1:
    for j in n_atoms:
      for idx_cell_2 in cell_displacements_2:
        for k in n_atoms_k:
          r_ij
          r_ik
          angle_ijk
          for idx_bin in bins:
            angular_feature(r_ij, r_ik, angle_ijk, idx_bin)

An example of jax version for radial features

def gaussian_cdf(x: float, mu: float, sigma: float) -> float:
    '''
        Calculate gaussian cumulative distribution function 
            from lower bound -inf to upper bound `x` 
    '''
    cdf_x = 0.5 * (1 + jax.scipy.special.erf((x-mu)/sigma/jnp.sqrt(2)))
    return cdf_x

@partial(jax.vmap, in_axes=(None, 0, None), out_axes=0)
def gaussian_cdf_matrix(*args):
    '''
        return: shape (n_mu, n_x)
    '''
    return gaussian_cdf(*args)

@partial(jax.jit, static_argnums=[6, 7, 8, 9, 10])
def compute_radial_features(
        R: jax.Array,
        idx_i: jax.Array,
        idx_j: jax.Array,
        idx_pair: jax.Array,
        cell_shift: jax.Array,
        cell: jax.Array,
        n_unique_pair: int,
        r_cut: float,
        n_bins: int,
        sigma: float,
        prefactor: float,
    ) -> jax.Array:

    R_ij = R[idx_i] - R[idx_j] + cell_shift.dot(cell)
    r_ij = jnp.linalg.norm(R_ij, axis=1, keepdims=True)

    x = jnp.linspace(0, r_cut, n_bins+1)

    cdf = gaussian_cdf_matrix(x, r_ij, sigma)
    cdf *= sigma * jnp.sqrt(2*jnp.pi)

    cdf *= 1 / r_ij**2

    integral_in_bins = cdf[:, 1:] - cdf[:, :-1]

    radial_features = jnp.zeros((n_unique_pair, n_bins))
    radial_features = radial_features.at[idx_pair].add(integral_in_bins)
    radial_features = 0.5 * radial_features.flatten() 

    radial_features *= prefactor

    return radial_features

@partial(jax.jit, static_argnums=[6, 7, 8, 9, 10])
@jax.jacfwd
def compute_radial_feature_grad(*args):
    return compute_radial_features(*args)

install

env MPICC=/comm/swstack/core/intel-2019.5.281/mpich/3.2/bin/mpicc pip install mpi4py==3.0.1
pip install cymem==1.31.2

Candidate Generator

CrossOver acts on both parents while Mutation is only valid on parents[0].

    def get_new_candidate(self, parents):
        """Generate new candidate by applying a randomly drawn
        operation on the structures. This is done successively for
        each list of operations, if multiple are present.
        """
        for op_list, rho_list in zip(self.operations, self.rho):
            for i_trial in range(5): # Do five trials
                to_use = self.__get_index__(rho_list)
                anew = op_list[to_use].get_new_candidate(parents)
                if anew is not None:
                    parents[0] = anew # anew will be used in next set of operations
                    break
            else:
                anew = parents[0] # use old parents[0] if the operation failed
                anew = op_list[to_use].finalize(anew, successfull=False)
        return anew

Errors:

Wrong population size

Traceback (most recent call last):
  File "calc_gofee.py", line 62, in <module>
    search.run()
  File "/home/tang/project/constrained-GOFEE/src/gofee/gofee.py", line 295, in run
    unrelaxed_candidates = self.generate_new_candidates()
  File "/home/tang/project/constrained-GOFEE/src/gofee/gofee.py", line 372, in generate_new_candidates
    candidates = parallel_function_eval(self.comm, func1)
  File "/home/tang/project/constrained-GOFEE/src/gofee/parallel_utils.py", line 21, in parallel_function_eval
    results = func()
  File "/home/tang/project/constrained-GOFEE/src/gofee/gofee.py", line 371, in func1
    return [self.generate_candidate() for i in task_split[self.comm.rank]]
  File "/home/tang/project/constrained-GOFEE/src/gofee/gofee.py", line 371, in <listcomp>
    return [self.generate_candidate() for i in task_split[self.comm.rank]]
  File "/home/tang/project/constrained-GOFEE/src/gofee/gofee.py", line 394, in generate_candidate
    parents = self.population.get_structure_pair()
  File "/home/tang/project/constrained-GOFEE/src/gofee/population.py", line 97, in get_structure_pair
    t1, t2 = np.random.permutation(len(self.pop))[:2]
ValueError: not enough values to unpack (expected 2, got 1)

get_structure_pair() in gofee.population is defined as:

def get_structure_pair(self):
    t1, t2 = np.random.permutation(len(self.pop))[:2]
    structure_pair = [self.pop[t1].copy(), self.pop[t2].copy()]
    return structure_pair

A possible solution to get rid of the case where the population only has one structure is that

def get_structure_pair(self):
    try:
        t1, t2 = np.random.permutation(len(self.pop))[:2]
    except ValueError:
        t1 = 0
        t2 = 0
        print('len(self.pop): {}'.format(len(self.pop)))
        print(self.pop)
    structure_pair = [self.pop[t1].copy(), self.pop[t2].copy()]
    return structure_pair