Lennard-Jones equation of state#

We illustrate the usage of pantarei to manage the calculation of the equation of state of a Lennard-Jones fluid.

Setup#

Make sure you have the following packages installed

pantarei (from framagit)
atooms-database
atooms-dynamics
atooms-pp
matplotlib

Basic imports

import os
import random
import numpy
import matplotlib.pyplot as plt

Production#

Simulate the Lennard-Jones fluid along some isotherms. Tag this dataset isotherm to distiguish it from others.

from pantarei import Task, Job

# Sequential
# job = Task(run_md, tag='isotherm')

# Parallel (if scheduler is available on the node)
job = Job(Task(run_md, tag='isotherm'))
for T in [2.0, 3.0]:
    for rho in numpy.arange(0.3, 1.0, 0.1):
        job(rho=round(rho, 12), T=T, N=256, seed=1, rmsd=None, nsteps=40000, timestep=0.002,
            output_path='/tmp/eos/N{N}/T{T}/rho{rho}/seed{seed}')

Analysis#

Browse the pantarei cache and get the simulation data along the isotherms (syntax: function name - tag; can match wildcards; if no tag, just the function name). These are all the entries of the dataset.

from pantarei import pantarei

ds = pantarei.browse('run_md-isotherm')
ds.columns()

F_s(k,t)
F_s_1(k,t)
F_s_2(k,t)
N
P
T
TSP
_dirname
artifacts
backend
berendsen_tau
blocks
concentration
conserved_energy
density
equilibration_dynamics
fix_cm_period
job_end
job_node
job_queue
job_start
md5_hash
model
nsteps
output_path
parallel
path
postprocess
potential_energy
pressure
production_dynamics
production_rate
rho
rmsd
seed
steps
tau(k)
tau_1(k)
tau_2(k)
temperature
thermostat_mass
timestep
unique_seed
verbose
wall_time
x

Print thermodynamic data, sorting first by temperature, then density

ds.pprint(columns=['T', 'rho', 'pressure', 'potential_energy'], sort_by=('T', 'rho'))

T   rho pressure           potential_energy
----------------------------------------------
0 0.3 0.5898617854368878 -1.0069639862394693
0 0.4 0.8233723392708177 -1.3299896189775564
0 0.5 1.1131326522161213 -1.6685100047940105
0 0.6 1.5707431636408715 -1.9817391798883273
0 0.7 2.1742059563044656 -2.301718465392361
0 0.8 3.278132889435198  -2.5668502145490066
0 0.9 5.033272134058294  -2.7667899993267735
0 0.3 1.0088127633114634 -0.9108300375209232
0 0.4 1.4353803075126594 -1.2207187684255687
0 0.5 1.9958267838780035 -1.5150167078096914
0 0.6 2.6816024264639733 -1.817581849144513
0 0.7 3.783999748749068  -2.064617517694943
0 0.8 5.411608963091276  -2.258963353688038
0 0.9 7.905726096178788  -2.355425440614277

More control on formatting by looping over the data entries

fmt = ' | '.join(['{T}', '{rho:.2f}', '{pressure:.4f}'])
for data in ds.find(sort_by=('T', 'rho')):
    print('| ' + fmt.format(**data) + ' |')

Temperature	Density	Pressure
2.0	0.3	0.5899
2.0	0.4	0.8234
2.0	0.5	1.1131
2.0	0.6	1.5707
2.0	0.7	2.1742
2.0	0.8	3.2781
2.0	0.9	5.0333
3.0	0.3	1.0088
3.0	0.4	1.4354
3.0	0.5	1.9958
3.0	0.6	2.6816
3.0	0.7	3.7840
3.0	0.8	5.4116
3.0	0.9	7.9057

Plot the equation of state. Sorting is a bit convoluted this way…

fig, ax = plt.subplots()
for T in ds['T'].unique():
    mask = ds['T'] == T
    rho, Z = ds['rho'], (ds['pressure'] * ds['density']) / ds['T']
    rho, Z = rho[mask], Z[mask]
    # Sort by density
    rho, Z = zip(*sorted(zip(rho, Z), key=lambda _: _[0]))
    ax.plot(rho, Z, '-o', label=f'T={T}')
ax.set_xlabel('Density')
ax.set_ylabel('Compressibility factor')
ax.legend()
fig.show()
fig.savefig('lj_eos.png')

It is simpler by using dataset sorting mechanism

from pantarei import query

fig, ax = plt.subplots()
for T in ds['T'].unique():
    data = ds.find(query.T == T, sort_by='rho')
    rho, Z = data['rho'], (data['pressure'] * data['density']) / data['T']
    ax.plot(rho, Z, '-o', label=f'T={T}')
ax.set_xlabel('Density')
ax.set_ylabel('Compressibility factor')
ax.legend()
fig.show()

Alternatively collect the artifacts in a dataset. We have almost the same info (except information about jobs)

from pantarei import Dataset

ds = Dataset()
ds.insert('/tmp/eos/**/*.yaml')
ds.insert('/tmp/eos/**/*.fskt*')
print(ds.columns())

['F_s(k,t)', 'F_s_1(k,t)', 'F_s_2(k,t)', 'N', 'T', 'TSP', '_dirname', 'backend', 'block', 'block size', 'block steps', 'cell_side', 'cell_volume', 'cm_velocity_norm', 'composition', 'concentration', 'density', 'duration', 'frames', 'grandcanonical', 'md5_hash', 'mean_conserved energy per particle', 'mean_density', 'mean_potential energy per particle', 'mean_pressure', 'mean_temperature', 'mean_total energy per particle', 'model', 'number_of_particles', 'number_of_species', 'path', 'rho', 'rmsd', 'seed', 'size_dispersion', 'species', 'std_conserved energy per particle', 'std_density', 'std_potential energy per particle', 'std_pressure', 'std_temperature', 'std_total energy per particle', 'steps', 'timestep', 'wall_time']

Plot \(F_s(k,t)\). Note it is a 3-dimensional array: (k, t, Fs(k,t))

from pantarei import query

fig, ax = plt.subplots()
for data in ds.find(query.T == 2.0, sort_by='rho'):
    ax.semilogx(*data['F_s_1(k,t)'][1:], label=fr'$\rho$={data["rho"]}')
ax.legend()
ax.set_xlabel('t')
ax.set_ylabel('$F_{s,1}(k,t)$')  # TODO: double subscript in F_s_1 not allowed
ax.legend()
fig.show()
fig.savefig('lj_fskt.png')

Library#

Helper function to make a unique seed how to of

def _unique_seed(seed, **kwargs):
    """Initialize random numbers with an almost unique seed for each parameter combination"""
    import random
    import numpy
    from hashlib import md5

    kwargs['seed'] = seed
    kwargs = {kwargs[key] for key in sorted(kwargs)}
    md5_hash = md5(str(kwargs).encode()).hexdigest()
    _seed = int(md5_hash, 16)
    _seed = int(str(_seed)[:9])
    random.seed(_seed)
    numpy.random.seed(_seed)
    return _seed

Rather elaborate driver for MD simulation. It melts a crystal, equilibrate it, make a production run and finally postprocess the trajectory to measure some correlation functions. The results are returned as a dict and optionally written to disk in the output_path folder.

from atooms.system import System, Thermostat, Barostat
from atooms import database
from atooms.backends.f90 import Interaction, VerletList
from atooms.dynamics import Berendsen, VelocityVerlet, NosePoincare
from atooms.simulation import Simulation, Scheduler, store, write_trajectory, write_thermo, target_rmsd, target_steps
from atooms.trajectory import Trajectory, TrajectoryRam, TrajectoryXYZ
from atooms.simulation.reports import *
import atooms.postprocessing as pp

def run_md(N, T, P=None, rho=None, nsteps=1000000, rmsd=4.0,
        x=0.8, model='lennard_jones', seed=1, output_path=None, parallel=False,
        production_rate=2, equilibration_dynamics="nose-poincare",
        thermostat_mass=5.0, berendsen_tau=100.0, timestep=0.004, postprocess=None,
        fix_cm_period=20000, blocks=200, verbose=False):
    """
    Run a simulation with atooms-dynamics store and return a dict of
    post-processed data
    """
    _locals = locals()

    # Defaults
    if postprocess is None: postprocess = ['fskt']
    from atooms.core.utils import setup_logging
    if verbose: setup_logging(level=20)

    # Sanity check
    assert not (P is None and rho is None), 'provide either P or rho'
    if P and rho:
        P = None

    # Create directory to store artifacts
    from atooms.core.utils import mkdir
    if output_path is not None:
        output_path = output_path.format(**_locals)
        mkdir(output_path)

    # Create system of N particles in contact with a thermostat
    # The seed is parametrized with T so that every run is different
    import numpy
    unique_seed = _unique_seed(seed, P=P, rho=rho, T=T, N=N, x=x)
    system = System(N=N)
    system.composition = {'A': round(N*x), 'B': N - round(N*x)}
    system.species_layout = 'F'
    system.temperature = T
    system.density = 1.0
    if rho is not None:
        system.density = rho

    # Specify interaction
    _model = database.model(model)
    system.interaction = Interaction(_model, parallel=parallel, helpers='helpers_3d.f90')
    system.interaction.neighbor_list = VerletList(binning=False, parallel=parallel,
                                                  helpers='helpers_3d.f90')

    # Melt
    system.thermostat = Thermostat(temperature=4.0, relaxation_time=berendsen_tau)
    bck = Berendsen(system, timestep=timestep / 4)
    sim = Simulation(bck, enable_speedometer=verbose)
    sim.run(10000)

    # Simulation backend for equilibration
    assert equilibration_dynamics in ['berendsen', 'nose-poincare']
    if equilibration_dynamics == 'nose-poincare': assert P is None
    if equilibration_dynamics == 'berendsen':
        system.thermostat = Thermostat(temperature=T, relaxation_time=berendsen_tau)
        if P is not None:
            system.barostat = Barostat(pressure=P, relaxation_time=1e4)
        bck = Berendsen(system, timestep=timestep / 2)
    if equilibration_dynamics == 'nose-poincare':
        system.thermostat = Thermostat(temperature=T, mass=thermostat_mass)
        bck = NosePoincare(system, timestep=timestep / 2)
    sim = Simulation(bck, enable_speedometer=verbose)

    # Simulation callbacks
    sim.add(store, 200, ['density', 'pressure', 'temperature',
                         'potential energy per particle',
                         'total energy per particle'])
    if output_path is not None:
        sim.output_path = os.path.join(output_path, 'equilibration')
        sim.add(write_thermo, 1000)
    # Target rmsd, but stop at steps
    if rmsd is not None:
        sim.add(target_rmsd, 20000, rmsd)
    # Fix the CM regularly, this is crucial for long runs with Berendsen
    sim.add(lambda sim: sim.system.fix_momentum(), fix_cm_period)
    sim.add(target_steps, nsteps, nsteps)

    # Equilibration run
    sim.run(nsteps)

    if output_path is not None:
        db = {}
        db.update({'model': model})
        db.update({'steps': sim.current_step})
        db.update(report_system(system))
        db.update(report_simulation(sim))
        db.update(report_stats(sim.data))
        import yaml
        with open(os.path.join(output_path, 'equilibration.yaml'), 'w') as fh:
            yaml.dump(db, fh, sort_keys=False)

    # Production run
    if P is not None:
        # Set density to the average one during the thermalisation run (second half)
        # This way we avoid small fluctuations due to finite size
        half = len(sim.data['density']) // 2
        density = numpy.mean(sim.data['density'][-half:])
        system.density = density

    # Make sure there are alyaws about blocks blocks
    def optimal_block(target_n_blocks):
        for exponent in range(5, 20):
            n_blocks = sim.current_step // 2**exponent
            if n_blocks < target_n_blocks:
                break
        return [0] + [2**i for i in range(exponent)]
    scheduler_ram = Scheduler(block=optimal_block(blocks))
    scheduler_xyz = Scheduler(block=optimal_block(blocks))
    nsteps = int(production_rate * 200 * scheduler_xyz.block[-1])

    system.thermostat = Thermostat(temperature=T, mass=thermostat_mass)
    bck = NosePoincare(system, timestep=timestep)
    sim = Simulation(bck, enable_speedometer=verbose)

    # Store trajectories
    from atooms.trajectory.ram import TrajectoryRamLight
    th = TrajectoryRamLight()
    th.timestep = bck.timestep
    func = lambda sim: sim.backend.conserved_energy / len(sim.system.particle)
    sim.add(write_trajectory, scheduler_ram, trajectory=th)
    sim.add(store, Scheduler(calls=1000), ['density', 'pressure', 'temperature',
                                           'potential energy per particle',
                                           ('conserved energy per particle', func)])

    # Production run
    sim.run(nsteps)

    # Data to be returned
    data = {
        'unique_seed': unique_seed,
        'concentration': sim.system.concentration,
        'rmsd': sim.rmsd.item(),
        'steps': sim.current_step,
        'pressure': numpy.mean(sim.data['pressure']).item(),
        'temperature': numpy.mean(sim.data['temperature']).item(),
        'potential_energy': numpy.mean(sim.data['potential energy per particle']).item(),
        'conserved_energy': numpy.mean(sim.data['conserved energy per particle']).item(),
        'density': numpy.mean(sim.data['density']).item(),
        'output_path': output_path
    }
    data.update(report_simulation(sim))

    # Stats and logs
    if output_path is not None:
        import yaml
        db = {}
        db.update({'model': model})
        db.update(report_system(system))
        db.update(report_simulation(sim))
        db.update(report_trajectory(th))
        db.update(report_stats(sim.data))
        with open(os.path.join(output_path, 'production.yaml'), 'w') as fh:
            yaml.dump(db, fh, sort_keys=False)

    # Post processing
    path = None
    if output_path is not None:
        path = os.path.join(output_path, 'production.{symbol}.{tag}')
    if 'fskt' in postprocess:
        from atooms.postprocessing.helpers import logx_grid, filter_species
        tgrid = [0.0] + logx_grid(th.timestep, th.total_time * 0.5, 60)
        cf = pp.SelfIntermediateScattering(th, kgrid=[7.25], tgrid=tgrid)
        cf.do(path)
        data.update({**cf.results, **cf.analysis})
        cf = pp.Partial(pp.SelfIntermediateScattering, th, kgrid=[7.25], tgrid=tgrid)
        cf.do(path)
        data.update({**cf.results, **cf.analysis})

    if 'gr' in postprocess:
        cf = pp.Partial(pp.RadialDistributionFunction, th, norigins=-1, dr=0.01)
        cf.compute()
        data.update(cf.results)

    if 'vacf' in postprocess:
        tgrid = [0.0] + logx_grid(th.timestep, 10.0, 160)
        cf = pp.Partial(pp.VelocityAutocorrelation, th, tgrid=tgrid, norigins=-1)
        cf.compute()
        data.update(cf.results)

    if 'msd' in postprocess:
        tgrid = [0.0] + logx_grid(th.timestep, 10.0, 160)
        cf = pp.Partial(pp.MeanSquareDisplacement, th, tgrid=tgrid, norigins=-1)
        cf.compute()
        data.update(cf.results)

    # Store artifacts as the whole output_path
    if output_path:
        data['artifacts'] = output_path

    from atooms.core.utils import clear_logging
    clear_logging()
    return data