diff --git a/Makefile b/Makefile index a5117b454e..e7e528b053 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DEVTOOLS_DIR := devtools .PHONY: all help clean test test-unittests test-functional test-all \ install-all install-ci install-rmg install-rmgdb install-autotst install-gcn \ - install-gcn-cpu install-kinbot install-sella install-xtb install-torchani install-ob \ + install-gcn-cpu install-kinbot install-sella install-xtb install-torchani install-uma install-ob \ lite check-env compile @@ -36,6 +36,7 @@ help: @echo " install-sella Install Sella" @echo " install-xtb Install xTB" @echo " install-torchani Install TorchANI" + @echo " install-uma Install UMA (fairchem MLIP, gated model; users only, not CI)" @echo " install-ob Install OpenBabel" @echo "" @echo "Maintenance:" @@ -99,6 +100,11 @@ install-xtb: install-torchani: bash $(DEVTOOLS_DIR)/install_torchani.sh +# UMA (fairchem MLIP). Not part of install-ci: the model is gated (Meta license + HuggingFace +# token) and heavy, so this is a manual, user-driven setup. See devtools/install_uma.sh. +install-uma: + bash $(DEVTOOLS_DIR)/install_uma.sh + install-ob: bash $(DEVTOOLS_DIR)/install_ob.sh diff --git a/arc/job/adapters/ase_adapter.py b/arc/job/adapters/ase_adapter.py index 6f30983c40..ec7daa970e 100644 --- a/arc/job/adapters/ase_adapter.py +++ b/arc/job/adapters/ase_adapter.py @@ -12,7 +12,7 @@ from arc.job.adapters.common import _initialize_adapter from arc.job.factory import register_job_adapter from arc.imports import settings -from arc.settings.settings import ARC_PYTHON, find_executable +from arc.settings.settings import ARC_PYTHON, UMA_LATEST_MODEL, find_executable if TYPE_CHECKING: from arc.level import Level @@ -25,8 +25,12 @@ DEFAULT_ASE_ENV = { 'torchani': 'TANI_PYTHON', 'xtb': 'XTB_PYTHON', + 'uma': 'UMA_PYTHON', } +# Level methods that select the UMA calculator. 'uma' resolves to the latest model. +UMA_METHODS = ('uma', 'uma-s-1', 'uma-s-1p1') + class ASEAdapter(JobAdapter): """ A generic adapter for ASE (Atomic Simulation Environment) jobs. @@ -77,12 +81,13 @@ def __init__(self, self.job_adapter = 'ase' self.execution_type = execution_type or 'incore' self.incore_capacity = 100 - + self.sp = None self.opt_xyz = None self.freqs = None self.args = args or dict() + self.level = level # also set by _initialize_adapter; needed early by get_python_executable self.python_executable = self.get_python_executable() self.script_path = os.path.join(os.path.dirname(__file__), 'scripts', 'ase_script.py') @@ -128,11 +133,46 @@ def __init__(self, xyz=xyz, ) + def determine_calculator_name(self) -> str: + """ + Determine the ASE calculator name, from ``args['keyword']['calculator']`` if given, + otherwise inferred from the level method (e.g., a 'uma' method selects the UMA calculator). + + Returns: + str: The lowercased calculator name (empty string if undetermined). + """ + calc = (self.args or dict()).get('keyword', dict()).get('calculator', '') + if not calc and self.level is not None and getattr(self.level, 'method', None) \ + and self.level.method.lower() in UMA_METHODS: + calc = 'uma' + return calc.lower() + + def determine_settings(self) -> dict: + """ + Build the ``settings`` block passed to ase_script.py: the user's ``args['keyword']`` plus + a resolved ``calculator`` and, for UMA, default ``model`` (the level method, with 'uma' + resolving to the latest model), ``task``, and ``device``. + + Returns: + dict: The resolved ASE run settings. + """ + settings_dict = dict((self.args or dict()).get('keyword', dict())) + calc = self.determine_calculator_name() + if calc: + settings_dict.setdefault('calculator', calc) + if calc == 'uma': + if 'model' not in settings_dict: + method = self.level.method.lower() if self.level is not None and self.level.method else 'uma' + settings_dict['model'] = UMA_LATEST_MODEL if method == 'uma' else method + settings_dict.setdefault('task', 'omol') + settings_dict.setdefault('device', 'cpu') + return settings_dict + def get_python_executable(self) -> str: """ Identify the correct Python executable based on the calculator. """ - calc = self.args.get('keyword', {}).get('calculator', '').lower() + calc = self.determine_calculator_name() env_mapping = settings.get('ASE_CALCULATORS_ENV', DEFAULT_ASE_ENV) env_var_name = env_mapping.get(calc) @@ -157,15 +197,41 @@ def write_input_file(self) -> None: 'xyz': self.xyz, 'charge': self.charge, 'multiplicity': self.multiplicity, + 'is_ts': self.species[0].is_ts if self.species else False, 'constraints': self.constraints, - 'settings': self.args.get('keyword', {}), + 'irc_direction': self.irc_direction, + 'settings': self.determine_settings(), } save_yaml_file(os.path.join(self.local_path, 'input.yml'), input_dict) + def warn_if_unreliable_uma_sp(self) -> bool: + """ + Warn if this is a UMA single point on a species whose absolute UMA energy is unreliable + (an isolated atom or triplet O2). UMA's geometries/frequencies are fine; only the absolute + energy of these under-represented species is off, so a DFT single point is preferable. + + Reference: This is a known issue for general machine learning interatomic potentials (MLIPs) + such as UMA (https://arxiv.org/abs/2405.20235), where atomic energy offsets do not accurately + model isolated non-bonded atoms or highly specific spin states like triplet O2. + """ + if self.job_type not in ['sp', 'conf_sp'] or self.determine_calculator_name() != 'uma': + return False + symbols = self.xyz['symbols'] if self.xyz is not None else tuple() + is_atom = len(symbols) == 1 + is_triplet_o2 = len(symbols) == 2 and all(s == 'O' for s in symbols) and self.multiplicity == 3 + if is_atom or is_triplet_o2: + label = self.species[0].label if self.species else 'species' + logger.warning(f'Computing a UMA single point for {label} (an isolated atom or triplet O2). ' + f'UMA absolute energies are unreliable for these under-represented species; ' + f'consider using a DFT single point instead.') + return True + return False + def execute_incore(self) -> None: """ Execute the job incore. """ + self.warn_if_unreliable_uma_sp() self.write_input_file() cmd = [self.python_executable, self.script_path, '--yml_path', self.local_path] process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) diff --git a/arc/job/adapters/scripts/ase_script.py b/arc/job/adapters/scripts/ase_script.py index 07fa99faa4..d45af97b94 100644 --- a/arc/job/adapters/scripts/ase_script.py +++ b/arc/job/adapters/scripts/ase_script.py @@ -92,7 +92,18 @@ def get_calculator(calc_config: dict, charge: int = 0, multiplicity: int = 1): if multiplicity > 1: raise ValueError("ARC's integration with MOPAC vua the ASE calculator does not support multiplicity > 1.") return MOPAC(**kwargs) - + + elif name in ('uma', 'fairchem'): + # UMA (Meta FAIR fairchem-core). Total charge and spin (= multiplicity) are conditioned on + # the ase.Atoms via atoms.info in main(); they are not calculator kwargs. + from fairchem.core import FAIRChemCalculator, pretrained_mlip + model = calc_config.get('model', 'uma-s-1p1') + device = calc_config.get('device', 'cpu') + task = calc_config.get('task', 'omol') + predictor = pretrained_mlip.get_predict_unit(model, device=device) + return FAIRChemCalculator(predictor, task_name=task) + + from ase.calculators.calculator import get_calculator_class try: calc_class = get_calculator_class(name) @@ -313,8 +324,10 @@ def main(): settings = input_dict.get('settings', {}) charge = input_dict.get('charge', 0) multiplicity = input_dict.get('multiplicity', 1) - + is_ts = input_dict.get('is_ts', False) + atoms = Atoms(symbols=xyz['symbols'], positions=xyz['coords']) + atoms.info.update({'charge': charge, 'spin': multiplicity}) # UMA (omol) conditions on these calc = get_calculator(settings, charge, multiplicity) atoms.calc = calc @@ -342,13 +355,16 @@ def save_current_geometry(out_dict, atoms_obj, input_xyz): 'scipyfminbfgs': SciPyFminBFGS, 'scipyfmincg': SciPyFminCG, 'sella': None, } - if engine_name == 'sella': + logfile = os.path.join(os.path.dirname(input_path), 'opt.log') + if is_ts or engine_name == 'sella': + # A TS search needs a saddle-point optimizer; UMA ships none, so use Sella. from sella import Sella opt_class = Sella + opt = opt_class(atoms, order=1 if is_ts else 0, logfile=logfile) else: opt_class = engine_dict.get(engine_name, BFGS) - opt = opt_class(atoms, logfile=os.path.join(os.path.dirname(input_path), 'opt.log')) - + opt = opt_class(atoms, logfile=logfile) + try: opt.run(fmax=fmax, steps=steps) save_current_geometry(output, atoms, xyz) @@ -360,6 +376,26 @@ def save_current_geometry(out_dict, atoms_obj, input_xyz): # For non-optimization jobs, still save the geometry save_current_geometry(output, atoms, xyz) + if job_type == 'irc': + from sella import IRC + from ase.io import read + fmax = float(settings.get('fmax', 0.001)) + steps = int(settings.get('steps', 1000)) + direction = input_dict.get('irc_direction', 'forward') + traj_path = os.path.join(os.path.dirname(input_path), 'irc.traj') + try: + irc = IRC(atoms, logfile=os.path.join(os.path.dirname(input_path), 'irc.log'), + trajectory=traj_path) + irc.run(fmax=fmax, steps=steps, direction=direction) + images = read(traj_path, index=':') + output['irc_traj'] = [ + {'coords': tuple(map(tuple, image.get_positions().tolist())), + 'symbols': xyz['symbols'], + 'isotopes': xyz.get('isotopes') or tuple([None] * len(xyz['symbols']))} + for image in images] + except Exception as exc: + output['error'] = f"IRC failed: {exc}" + if job_type in ['freq', 'optfreq']: try: freq_results = run_vibrational_analysis(atoms, settings) diff --git a/arc/job/adapters/uma_test.py b/arc/job/adapters/uma_test.py new file mode 100644 index 0000000000..04be91cd43 --- /dev/null +++ b/arc/job/adapters/uma_test.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python3 +# encoding: utf-8 + +""" +Unit tests for using the UMA (Meta FAIR fairchem-core) calculator through ARC's ASE job adapter. + +The env-independent tests verify UMA routing, calculator/settings resolution, input writing, and +output parsing without the gated model. The model-dependent tests (skipped unless uma_env and the +model are available and UMA_RUN_MODEL is set) run the real uma-s-1p1 model end-to-end. +""" + +import os +import shutil +import sys +import unittest +import unittest.mock + +from arc.common import ARC_TESTING_PATH, almost_equal_coords, read_yaml_file, save_yaml_file +from arc.job.adapters.ase_adapter import ASEAdapter +from arc.level import Level +from arc.parser.parser import (parse_1d_scan_coords, parse_e_elect, parse_frequencies, + parse_geometry, parse_irc_traj) +from arc.settings.settings import UMA_LATEST_MODEL, UMA_PYTHON, supported_ess +from arc.species import ARCSpecies + + +requires_model = unittest.skipUnless( + UMA_PYTHON is not None and os.environ.get('UMA_RUN_MODEL'), + 'The uma_env environment / UMA model is unavailable, or UMA_RUN_MODEL is not set.', +) + + +class TestUMAViaASEWiring(unittest.TestCase): + """Env-independent tests for routing a 'uma' level to the ASE adapter.""" + + def test_supported_ess(self): + """Test that the ASE engine UMA runs through is supported.""" + self.assertIn('ase', supported_ess) + + def test_level_routes_to_ase(self): + """Test that a 'uma' level (and explicit checkpoints) resolves to the ASE software.""" + self.assertEqual(Level(method='uma').software, 'ase') + self.assertEqual(Level(method='uma-s-1').software, 'ase') + self.assertEqual(Level(method='uma-s-1p1').software, 'ase') + + +class TestUMAViaASEAdapter(unittest.TestCase): + """Env-independent tests for the ASEAdapter configured for UMA.""" + + @classmethod + def setUpClass(cls): + """A method that is run before all unit tests in this class.""" + cls.maxDiff = None + cls.base = os.path.join(ARC_TESTING_PATH, 'test_UMA_via_ASE') + os.makedirs(cls.base, exist_ok=True) + # UMA selected implicitly via the level method. + cls.job_method = ASEAdapter(execution_type='incore', job_type='sp', project='p', + project_directory=os.path.join(cls.base, 'method'), + level=Level(method='uma'), + species=[ARCSpecies(label='EtOH', smiles='CCO')], testing=True) + # UMA selected explicitly via args, with an explicit checkpoint. + cls.job_args = ASEAdapter(execution_type='incore', job_type='sp', project='p', + project_directory=os.path.join(cls.base, 'args'), + args={'keyword': {'calculator': 'uma', 'model': 'uma-s-1'}}, + species=[ARCSpecies(label='EtOH', smiles='CCO')], testing=True) + for job in (cls.job_method, cls.job_args): + os.makedirs(job.local_path, exist_ok=True) + + @classmethod + def tearDownClass(cls): + """A method that is run after all unit tests in this class.""" + shutil.rmtree(cls.base, ignore_errors=True) + + def test_determine_calculator_name(self): + """Test that the UMA calculator is detected from the level method or from args.""" + self.assertEqual(self.job_method.determine_calculator_name(), 'uma') + self.assertEqual(self.job_args.determine_calculator_name(), 'uma') + + def test_determine_settings_defaults(self): + """Test that UMA settings get sensible defaults (latest model, omol task, cpu).""" + settings = self.job_method.determine_settings() + self.assertEqual(settings['calculator'], 'uma') + self.assertEqual(settings['model'], UMA_LATEST_MODEL) + self.assertEqual(settings['task'], 'omol') + self.assertEqual(settings['device'], 'cpu') + + def test_determine_settings_explicit_model(self): + """Test that an explicit checkpoint in args is preserved.""" + self.assertEqual(self.job_args.determine_settings()['model'], 'uma-s-1') + + def test_get_python_executable(self): + """Test resolving the UMA python environment from settings.""" + ase_module = sys.modules[ASEAdapter.__module__] + with unittest.mock.patch.object(ase_module, 'settings', {'ASE_CALCULATORS_ENV': {'uma': 'UMA_PYTHON'}, + 'UMA_PYTHON': '/path/to/uma_python'}): + self.assertEqual(self.job_method.get_python_executable(), '/path/to/uma_python') + + def test_write_input_file(self): + """Test the input.yml carries charge/multiplicity/is_ts and resolved UMA settings.""" + self.job_method.write_input_file() + data = read_yaml_file(os.path.join(self.job_method.local_path, 'input.yml')) + self.assertEqual(data['job_type'], 'sp') + self.assertEqual(data['charge'], 0) + self.assertEqual(data['multiplicity'], 1) + self.assertFalse(data['is_ts']) + self.assertEqual(data['settings']['calculator'], 'uma') + self.assertEqual(data['settings']['model'], UMA_LATEST_MODEL) + self.assertEqual(data['settings']['task'], 'omol') + + def test_write_input_file_ts(self): + """Test that a TS species writes is_ts=True.""" + ts = ASEAdapter(execution_type='incore', job_type='opt', project='p', + project_directory=os.path.join(self.base, 'ts'), + level=Level(method='uma'), + species=[ARCSpecies(label='TS', is_ts=True, + xyz='O 0 0 0\nH 0 0 0.97\nH 0 0.94 -0.25')], testing=True) + os.makedirs(ts.local_path, exist_ok=True) + ts.write_input_file() + data = read_yaml_file(os.path.join(ts.local_path, 'input.yml')) + self.assertTrue(data['is_ts']) + + def test_warn_if_unreliable_uma_sp(self): + """Test the warning fires for a UMA single point on triplet O2 / an isolated atom.""" + o2 = ASEAdapter(execution_type='incore', job_type='sp', project='p', + project_directory=os.path.join(self.base, 'o2'), + level=Level(method='uma'), + species=[ARCSpecies(label='O2', xyz='O 0 0 0\nO 0 0 1.2', multiplicity=3)], testing=True) + + self.assertTrue(o2.warn_if_unreliable_uma_sp()) + self.assertFalse(self.job_method.warn_if_unreliable_uma_sp()) + + def test_output_yml_round_trip(self): + """Test a UMA/ASE output.yml is read back by ARC's YAML parser (incl. IRC/scan keys).""" + out_dir = os.path.join(self.base, 'roundtrip') + os.makedirs(out_dir, exist_ok=True) + out_path = os.path.join(out_dir, 'output.yml') + opt_xyz = {'symbols': ('O', 'H', 'H'), 'isotopes': (16, 1, 1), + 'coords': ((0.0, 0.0, 0.119), (0.0, 0.763, -0.477), (0.0, -0.763, -0.477))} + save_yaml_file(out_path, {'sp': -200123.45, 'opt_xyz': opt_xyz, + 'freqs': [1600.0, 3700.0, 3800.0], + 'irc_traj': [opt_xyz, opt_xyz], 'scan_coords': [opt_xyz]}) + self.assertAlmostEqual(parse_e_elect(out_path), -200123.45, places=2) + self.assertTrue(almost_equal_coords(parse_geometry(out_path), opt_xyz)) + self.assertEqual(len(parse_frequencies(out_path)), 3) + self.assertEqual(len(parse_irc_traj(out_path)), 2) + self.assertEqual(len(parse_1d_scan_coords(out_path)), 1) + + +@requires_model +class TestUMAViaASEWithModel(unittest.TestCase): + """Model-dependent tests; run the real uma-s-1p1 model via the ASE adapter.""" + + @classmethod + def setUpClass(cls): + """A method that is run before all unit tests in this class.""" + cls.base = os.path.join(ARC_TESTING_PATH, 'test_UMA_via_ASE_model') + + @classmethod + def tearDownClass(cls): + """A method that is run after all unit tests in this class.""" + shutil.rmtree(cls.base, ignore_errors=True) + + def _job(self, label, job_type, species, **kwargs): + """Build an incore UMA-via-ASE job.""" + return ASEAdapter(execution_type='incore', job_type=job_type, project='uma', + project_directory=os.path.join(self.base, f'{label}_{job_type}'), + level=Level(method='uma'), species=species, testing=True, **kwargs) + + def test_sp(self): + """Test a UMA single point returns a sane electronic energy (kJ/mol).""" + job = self._job('EtOH', 'sp', [ARCSpecies(label='EtOH', smiles='CCO')]) + job.execute_incore() + results = read_yaml_file(os.path.join(job.local_path, 'output.yml')) + self.assertIsInstance(results.get('sp'), float) + + def test_opt_freq(self): + """Test a UMA opt+freq returns a geometry and 3N-6 real frequencies.""" + spc = ARCSpecies(label='EtOH', smiles='CCO') + job = self._job('EtOH', 'optfreq', [spc]) + job.execute_incore() + results = read_yaml_file(os.path.join(job.local_path, 'output.yml')) + self.assertIn('opt_xyz', results) + self.assertEqual(len(results['freqs']), 3 * len(spc.get_xyz()['symbols']) - 6) + self.assertTrue(all(f > 0 for f in results['freqs'])) + + def test_ts_optfreq(self): + """Test a UMA TS opt+freq yields exactly one imaginary frequency.""" + ts_xyz = """N 0.0000000 0.0000000 0.3146069 +H -0.4668973 0.8086246 -0.0524357 +H -0.4668973 -0.8086246 -0.0524357 +H 0.9337946 0.0000000 -0.0524357""" + ts = ARCSpecies(label='TS', is_ts=True, xyz=ts_xyz, multiplicity=1) + job = self._job('TS', 'optfreq', [ts]) + job.execute_incore() + results = read_yaml_file(os.path.join(job.local_path, 'output.yml')) + self.assertEqual(sum(1 for f in results['freqs'] if f < 0), 1) + + +if __name__ == '__main__': + unittest.main() diff --git a/arc/level.py b/arc/level.py index d5dfe28443..83acc02112 100644 --- a/arc/level.py +++ b/arc/level.py @@ -372,6 +372,10 @@ def deduce_software(self, if self.software is not None and job_type is None: return + # UMA (run via the ASE adapter; 'uma' resolves to the latest model) + if self.method in ('uma', 'uma-s-1', 'uma-s-1p1'): + self.software = 'ase' + # OneDMin if job_type == 'onedmin': if 'onedmin' not in supported_ess: diff --git a/arc/parser/adapters/yaml.py b/arc/parser/adapters/yaml.py index 0d63c17510..80d8599acc 100644 --- a/arc/parser/adapters/yaml.py +++ b/arc/parser/adapters/yaml.py @@ -118,8 +118,15 @@ def parse_1d_scan_energies(self) -> tuple[list[float] | None, list[float] | None return None, None def parse_1d_scan_coords(self) -> list[dict[str, tuple]] | None: - """Parse 1D scan coordinates from YAML data.""" - # Not implemented. + """ + Parse 1D scan coordinates from YAML data. + + Returns: Optional[List[Dict[str, tuple]]] + The Cartesian coordinates (xyz dicts) for each scan point. + """ + scan_coords = self.data.get('scan_coords') + if scan_coords: + return [xyz if isinstance(xyz, dict) else str_to_xyz(xyz) for xyz in scan_coords] return None def parse_scan_conformers(self) -> 'pd.DataFrame' | None: @@ -139,7 +146,9 @@ def parse_irc_traj(self) -> list[dict[str, tuple]] | None: Returns: list[dict[str, tuple]] The Cartesian coordinates for each scan point. """ - # Not implemented. + irc_traj = self.data.get('irc_traj') + if irc_traj: + return [xyz if isinstance(xyz, dict) else str_to_xyz(xyz) for xyz in irc_traj] return None def parse_nd_scan_energies(self) -> dict | None: diff --git a/arc/parser/parser_test.py b/arc/parser/parser_test.py index e10496d842..1614e0a72a 100644 --- a/arc/parser/parser_test.py +++ b/arc/parser/parser_test.py @@ -1117,6 +1117,96 @@ def test_parse_ess_version(self): path5 = os.path.join(ARC_TESTING_PATH, 'freq', 'CH2O_freq_molpro.out') self.assertEqual(parser.parse_ess_version(path5), 'Molpro 2015.1.37') + def test_yaml_parser(self): + """Test the YAMLParser adapter for all its parse methods.""" + import tempfile + from arc.parser.adapters.yaml import YAMLParser + from arc.constants import E_h_kJmol, bohr_to_angstrom + import yaml + + yaml_data = { + 'opt_xyz': { + 'symbols': ('C', 'H', 'H', 'H', 'H'), + 'coords': ((0.0, 0.0, 0.0), (0.0, 0.0, 1.09), (1.03, 0.0, -0.36), (-0.51, 0.89, -0.36), (-0.51, -0.89, -0.36)) + }, + 'freqs': [1000.0, 1500.0, 3000.0], + 'normal_modes': [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], + 'T1': 0.012, + 'sp': -100.5, + 'zpe': 50.2, + 'energies': [-40.0, -39.9], + 'angles': [0.0, 180.0], + 'scan_coords': [ + {'symbols': ('H', 'H'), 'coords': ((0.0, 0.0, 0.0), (0.0, 0.0, 0.74))}, + "H 0.0 0.0 0.0\nH 0.0 0.0 0.8" + ], + 'irc_traj': [ + {'symbols': ('H', 'H'), 'coords': ((0.0, 0.0, 0.0), (0.0, 0.0, 0.75))}, + "H 0.0 0.0 0.0\nH 0.0 0.0 0.85" + ], + 'dipole': [1.0, 2.0, 3.0], + 'polarizability': 1.5 + } + + with tempfile.NamedTemporaryFile(suffix='.yml', mode='w', delete=False) as f: + yaml.dump(yaml_data, f) + temp_path = f.name + + try: + adapter = YAMLParser(log_file_path=temp_path) + self.assertIsNone(adapter.logfile_contains_errors()) + + # Test parse_geometry + geom = adapter.parse_geometry() + self.assertEqual(geom['symbols'], ('C', 'H', 'H', 'H', 'H')) + self.assertEqual(geom['coords'][0], (0.0, 0.0, 0.0)) + + # Test parse_frequencies + freqs = adapter.parse_frequencies() + np.testing.assert_array_almost_equal(freqs, np.array([1000.0, 1500.0, 3000.0])) + + # Test parse_normal_mode_displacement + modes_f, modes_d = adapter.parse_normal_mode_displacement() + np.testing.assert_array_almost_equal(modes_f, np.array([1000.0, 1500.0, 3000.0])) + np.testing.assert_array_almost_equal(modes_d, np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])) + + # Test parse_t1 + self.assertEqual(adapter.parse_t1(), 0.012) + + # Test parse_e_elect + self.assertEqual(adapter.parse_e_elect(), -100.5) + + # Test parse_zpe_correction + self.assertEqual(adapter.parse_zpe_correction(), 50.2) + + # Test parse_1d_scan_energies + e_scan, a_scan = adapter.parse_1d_scan_energies() + self.assertEqual(a_scan, [0.0, 180.0]) + self.assertAlmostEqual(e_scan[0], 0.0) + self.assertAlmostEqual(e_scan[1], 0.1 * E_h_kJmol) + + # Test parse_1d_scan_coords + coords = adapter.parse_1d_scan_coords() + self.assertEqual(len(coords), 2) + self.assertEqual(coords[0]['symbols'], ('H', 'H')) + self.assertEqual(coords[1]['symbols'], ('H', 'H')) + + # Test parse_irc_traj + traj = adapter.parse_irc_traj() + self.assertEqual(len(traj), 2) + self.assertEqual(traj[0]['symbols'], ('H', 'H')) + self.assertEqual(traj[1]['symbols'], ('H', 'H')) + + # Test parse_dipole_moment + self.assertAlmostEqual(adapter.parse_dipole_moment(), np.linalg.norm([1.0, 2.0, 3.0])) + + # Test parse_polarizability + self.assertAlmostEqual(adapter.parse_polarizability(), 1.5 * (bohr_to_angstrom ** 3)) + + finally: + if os.path.exists(temp_path): + os.remove(temp_path) + if __name__ == '__main__': unittest.main(testRunner=unittest.TextTestRunner(verbosity=2)) diff --git a/arc/settings/settings.py b/arc/settings/settings.py index dd463ae67a..e39ebe2d9e 100644 --- a/arc/settings/settings.py +++ b/arc/settings/settings.py @@ -268,8 +268,13 @@ ASE_CALCULATORS_ENV = {'torchani': 'TANI_PYTHON', 'xtb': 'SELLA_PYTHON', + 'uma': 'UMA_PYTHON', } +# UMA (Universal Models for Atoms, Meta FAIR fairchem-core). The 'uma' calculator/level resolves +# to the latest model implemented in ARC; older checkpoints (e.g. 'uma-s-1') are named explicitly. +UMA_LATEST_MODEL = 'uma-s-1p1' + valid_chars = "-_[]=.,%s%s" % (string.ascii_letters, string.digits) # A scan with better resolution (lower number here) takes more time to compute, @@ -337,8 +342,8 @@ ARC_FAMILIES_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'data', 'families') # Default environment names for sister repos -TS_GCN_PYTHON, TANI_PYTHON, AUTOTST_PYTHON, ARC_PYTHON, XTB, XTB_PYTHON, OB_PYTHON, RMG_PYTHON, RMG_PATH, RMG_DB_PATH = \ - None, None, None, None, None, None, None, None, None, None +TS_GCN_PYTHON, TANI_PYTHON, UMA_PYTHON, AUTOTST_PYTHON, ARC_PYTHON, XTB, XTB_PYTHON, OB_PYTHON, RMG_PYTHON, RMG_PATH, RMG_DB_PATH = \ + None, None, None, None, None, None, None, None, None, None, None home = os.getenv("HOME") or os.path.expanduser("~") @@ -375,6 +380,7 @@ def find_executable(env_name, executable_name='python'): return None TANI_PYTHON = find_executable('tani_env') +UMA_PYTHON = find_executable('uma_env') SELLA_PYTHON = find_executable('sella_env') OB_PYTHON = find_executable('ob_env') TS_GCN_PYTHON = find_executable('ts_gcn') diff --git a/devtools/install_uma.sh b/devtools/install_uma.sh new file mode 100755 index 0000000000..161167088f --- /dev/null +++ b/devtools/install_uma.sh @@ -0,0 +1,146 @@ +#!/usr/bin/env bash +# +# install_uma.sh - Set up the 'uma_env' environment for ARC's UMA engine (USERS, not CI). +# +# UMA (Universal Models for Atoms) is Meta FAIR's fairchem-core foundation MLIP. ARC runs it in +# a dedicated 'uma_env' conda environment (fairchem-core + sella + ase), shelling out to it from +# arc_env via arc/job/env_run.py. This script wraps every step needed to get UMA working: +# +# 1. Create the 'uma_env' conda env from devtools/uma_environment.yml. +# 2. Verify the fairchem / Sella (incl. IRC) / ASE imports the UMA adapter relies on. +# 3. Authenticate to HuggingFace for the GATED uma-s-1p1 model (one-time, interactive). +# 4. Print (and, with --test, use) the environment exports needed to run UMA from arc_env. +# +# This script is intentionally NOT part of devtools/install_all.sh / `make install-ci`: the UMA +# model is gated behind a Meta license + HuggingFace token and is heavy to download, so it is a +# manual, user-driven setup rather than a CI dependency. +# +# Prerequisite (do this once, in a browser logged into HuggingFace): +# Accept the model license at https://huggingface.co/facebook/UMA +# and create an access token with "read access to gated repos". +# +# Usage: +# bash devtools/install_uma.sh # install + verify + HuggingFace login (defaults to CPU) +# bash devtools/install_uma.sh --cpu # install UMA optimized for CPU only machines (default) +# bash devtools/install_uma.sh --gpu # install UMA with GPU (CUDA) support +# bash devtools/install_uma.sh --test # also run the UMA model-dependent unit tests +# bash devtools/install_uma.sh --skip-hf-login # skip the HuggingFace login step (CI/non-interactive) +# +# Re-running is safe: an existing 'uma_env' is updated in place. + +set -eo pipefail + +RUN_TESTS=0 +SKIP_HF_LOGIN=0 +DEVICE="cpu" +for arg in "$@"; do + case "$arg" in + --test) RUN_TESTS=1 ;; + --skip-hf-login) SKIP_HF_LOGIN=1 ;; + --cpu) DEVICE="cpu" ;; + --gpu) DEVICE="gpu" ;; + -h|--help) sed -n '2,30p' "$0"; exit 0 ;; + *) echo "Unknown argument: $arg" >&2; exit 1 ;; + esac +done + +# Resolve repo paths from this script's location (no hard-coded paths). +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ARC_DIR="$(dirname "$SCRIPT_DIR")" +ENV_YAML="$SCRIPT_DIR/uma_environment.yml" +ENV_NAME="$(grep -E '^ *name:' "$ENV_YAML" | head -1 | awk '{print $2}')" + +# 1) Pick a conda front-end and initialize shell integration. +if command -v micromamba &>/dev/null; then + COMMAND_PKG=micromamba + eval "$(micromamba shell hook --shell=bash)" +elif command -v mamba &>/dev/null; then + COMMAND_PKG=mamba + BASE=$(conda info --base); source "$BASE/etc/profile.d/conda.sh" +elif command -v conda &>/dev/null; then + COMMAND_PKG=conda + BASE=$(conda info --base); source "$BASE/etc/profile.d/conda.sh" +else + echo "❌ No micromamba/mamba/conda found in PATH." >&2 + exit 1 +fi +echo "✔️ Using $COMMAND_PKG" + +# 2) Create or update the environment. +if $COMMAND_PKG env list | grep -qE "^\s*${ENV_NAME}\s"; then + echo ">>> Updating existing '$ENV_NAME' from $ENV_YAML" + $COMMAND_PKG env update -n "$ENV_NAME" -f "$ENV_YAML" --prune +else + echo ">>> Creating '$ENV_NAME' from $ENV_YAML" + $COMMAND_PKG env create -n "$ENV_NAME" -f "$ENV_YAML" -y +fi + +# Install PyTorch and UMA dependencies +if [ "$DEVICE" = "cpu" ]; then + echo ">>> Installing CPU-only PyTorch" + $COMMAND_PKG run -n "$ENV_NAME" pip install torch --index-url https://download.pytorch.org/whl/cpu +else + echo ">>> Installing CUDA/GPU PyTorch" + $COMMAND_PKG run -n "$ENV_NAME" pip install torch +fi + +echo ">>> Installing fairchem-core, sella, and ase" +$COMMAND_PKG run -n "$ENV_NAME" pip install fairchem-core sella ase + +# 3) Verify the imports the UMA adapter / uma_script.py depend on. +echo ">>> Verifying fairchem / Sella / ASE imports in '$ENV_NAME'" +$COMMAND_PKG run -n "$ENV_NAME" python - <<'PYCODE' +from fairchem.core import FAIRChemCalculator, pretrained_mlip # noqa: F401 +from sella import Sella, IRC # noqa: F401 +import ase +print("fairchem + Sella (incl. IRC) + ASE", ase.__version__, "imports OK") +PYCODE + +# 4) HuggingFace authentication for the gated uma-s-1p1 model. +if [ "$SKIP_HF_LOGIN" -eq 0 ]; then + if [ -n "$HF_TOKEN" ]; then + echo ">>> Using HF_TOKEN from environment for HuggingFace authentication." + $COMMAND_PKG run -n "$ENV_NAME" huggingface-cli login --token "$HF_TOKEN" + elif $COMMAND_PKG run -n "$ENV_NAME" huggingface-cli whoami &>/dev/null; then + echo "✔️ Already authenticated to HuggingFace." + else + echo ">>> HuggingFace login is required for the gated model 'facebook/UMA'." + echo " If you have not yet accepted the license, open https://huggingface.co/facebook/UMA first." + $COMMAND_PKG run -n "$ENV_NAME" huggingface-cli login + fi +fi + +# 5) Runtime environment for invoking UMA from arc_env. +# These exports let arc_env's Python load OpenBabel correctly when invoked non-interactively +# (calling the env's python directly, rather than via an activated shell). PYTHONPATH points at +# the ARC checkout. Computed dynamically so there are no hard-coded paths. +ARC_ENV_PY="$($COMMAND_PKG run -n arc_env python -c 'import sys; print(sys.executable)')" +ARC_ENV_PREFIX="$(dirname "$(dirname "$ARC_ENV_PY")")" +BABEL_VERSION_DIR="$(ls -d "$ARC_ENV_PREFIX"/lib/openbabel/*/ 2>/dev/null | head -1)" + +export_block() { + echo "export BABEL_LIBDIR=${BABEL_VERSION_DIR%/}" + echo "export BABEL_DATADIR=${ARC_ENV_PREFIX}/share/openbabel/$(basename "${BABEL_VERSION_DIR%/}")" + echo "export PYTHONPATH=${ARC_DIR}:\$PYTHONPATH" +} + +echo "" +echo "✅ '$ENV_NAME' is ready. ARC discovers it via find_executable('$ENV_NAME')." +echo "" +echo "To run a UMA job, activate arc_env and set 'method' to 'uma' (resolves to the latest model)." +echo "To run the UMA model-dependent unit tests, export the following and run pytest with UMA_RUN_MODEL=1:" +echo "----------------------------------------------------------------------" +export_block +echo "UMA_RUN_MODEL=1 ${ARC_ENV_PY} -m pytest arc/job/adapters/uma_test.py -v" +echo "----------------------------------------------------------------------" + +# 6) Optionally run the model-dependent tests now, using the exports above. +if [ "$RUN_TESTS" -eq 1 ]; then + echo ">>> Running the UMA model-dependent unit tests (first run downloads the model; this is slow)..." + export BABEL_LIBDIR="${BABEL_VERSION_DIR%/}" + export BABEL_DATADIR="${ARC_ENV_PREFIX}/share/openbabel/$(basename "${BABEL_VERSION_DIR%/}")" + export PYTHONPATH="${ARC_DIR}:${PYTHONPATH}" + UMA_RUN_MODEL=1 "$ARC_ENV_PY" -m pytest "$ARC_DIR/arc/job/adapters/uma_test.py" -v +fi + +echo "✅ UMA setup script finished." diff --git a/devtools/uma_environment.yml b/devtools/uma_environment.yml new file mode 100644 index 0000000000..2a97fbae25 --- /dev/null +++ b/devtools/uma_environment.yml @@ -0,0 +1,11 @@ +name: uma_env +channels: + - conda-forge +dependencies: + - python =3.12 + - numpy + - pandas + - pyyaml + - setuptools + - pip + - huggingface_hub diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 7e7b318cce..e0898cce69 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -105,6 +105,28 @@ Install dependencies - Test ARC by typing ``make test`` under the ARC folder after activating the anaconda `arc_env` environment. +Install the UMA engine (optional) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +ARC can use `UMA `_, Meta FAIR's ``fairchem-core`` +foundation machine-learned interatomic potential, as a fast local engine for geometry +optimization, frequencies, single points, hindered-rotor scans, IRCs, and transition-state +searches. Use ``method='uma'`` in a level of theory to select it (it resolves to the latest UMA +model implemented in ARC). + +UMA runs in its own ``uma_env`` conda environment and is **not** installed by ``make install-all`` +or in CI, because the model is gated behind a Meta license and a HuggingFace token and is heavy to +download. To set it up, run:: + + make install-uma # or: bash devtools/install_uma.sh + +This creates ``uma_env`` (``fairchem-core`` + ``sella`` + ``ase``), verifies the required imports, +and walks you through the one-time HuggingFace login for the gated model. Before running it, accept +the model license at https://huggingface.co/facebook/UMA (in a browser logged into HuggingFace) and +create a token with read access to gated repositories. To also run the UMA model-dependent unit +tests after installing, use ``bash devtools/install_uma.sh --test``. + + Create a ``.arc`` folder (optional but recommended) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^