diff --git a/arc/main.py b/arc/main.py index ca18bbbf3a..d363556fdf 100644 --- a/arc/main.py +++ b/arc/main.py @@ -98,20 +98,22 @@ class ARC(object): ess_settings (dict, optional): A dictionary of available ESS (keys) and a corresponding server list (values). bath_gas (str, optional): A bath gas. Currently used in OneDMin to calc L-J parameters. Allowed values are He, Ne, Ar, Kr, H2, N2, O2. - adaptive_levels (dict, optional): A dictionary of levels of theory for ranges of the number of heavy atoms in - the molecule. Keys are tuples of (min_num_atoms, max_num_atoms), values are dictionaries. Keys of the - sub-dictionaries are tuples of job types, values are levels of theory (str, dict or Level). + adaptive_levels (list, optional): A list of levels of theory for ranges of the number of heavy atoms in the + molecule. Each entry is a dictionary with an ``atom_range`` 2-list (min_num_atoms, max_num_atoms; the upper + bound may be ``'inf'``) and a ``levels`` mapping from job type to level of theory (str, dict or Level). + Job types sharing a level may be given as a whitespace- or comma-separated key (e.g. ``'opt freq'``). Job types not defined in adaptive levels will have non-adaptive (regular) levels. Example:: - adaptive_levels = {(1, 5): {('opt', 'freq'): 'wb97xd/6-311+g(2d,2p)', - 'sp': 'ccsd(t)-f12/aug-cc-pvtz-f12'}, - (6, 15): {('opt', 'freq'): 'b3lyp/cbsb7', - 'sp': 'dlpno-ccsd(t)/def2-tzvp'}, - (16, 30): {('opt', 'freq'): 'b3lyp/6-31g(d,p)', - 'sp': 'wb97xd/6-311+g(2d,2p)'}, - (31, 'inf'): {('opt', 'freq'): 'b3lyp/6-31g(d,p)', - 'sp': 'b3lyp/6-311+g(d,p)'}} + adaptive_levels = [{'atom_range': [1, 5], + 'levels': {'opt freq': 'wb97xd/6-311+g(2d,2p)', + 'sp': 'ccsd(t)-f12/aug-cc-pvtz-f12'}}, + {'atom_range': [6, 15], + 'levels': {'opt freq': 'b3lyp/cbsb7', + 'sp': 'dlpno-ccsd(t)/def2-tzvp'}}, + {'atom_range': [16, 'inf'], + 'levels': {'opt freq': 'b3lyp/6-31g(d,p)', + 'sp': 'wb97xd/6-311+g(2d,2p)'}}] freq_scale_factor (float, optional): The harmonic frequencies scaling factor. Could be automatically determined if not available in Arkane and not provided by the user. @@ -163,9 +165,8 @@ class ARC(object): ts_guess_level (Level): Level of theory for comparisons of TS guesses between different methods. irc_level (Level): The level of theory to use for IRC calculations. orbitals_level (Level): Level of theory for molecular orbitals calculations. - adaptive_levels (dict): A dictionary of levels of theory for ranges of the number of heavy atoms in - the molecule. Keys are tuples of (min_num_atoms, max_num_atoms), values are dictionaries. Keys of the - sub-dictionaries are tuples of job types, values are levels of theory (str, dict or Level). + adaptive_levels (dict): The processed adaptive levels, keyed by (min_num_atoms, max_num_atoms) tuples, each + mapping job-type tuples to ``Level`` objects (built from the user-facing ``adaptive_levels`` list). Job types not defined in adaptive levels will have non-adaptive (regular) levels. output (dict): Output dictionary with status and final QM file paths for all species. Only used for restarting, the actual object used is in the Scheduler class. @@ -431,8 +432,10 @@ def as_dict(self) -> dict: """ restart_dict = dict() if self.adaptive_levels is not None: - restart_dict['adaptive_levels'] = {atom_range: {job_type: level.as_dict() for job_type, level in levels_dict} - for atom_range, levels_dict in self.adaptive_levels.items()} + restart_dict['adaptive_levels'] = [ + {'atom_range': [atom_range[0], atom_range[1]], + 'levels': {' '.join(job_types): level.as_dict() for job_types, level in levels_dict.items()}} + for atom_range, levels_dict in self.adaptive_levels.items()] if self.allow_nonisomorphic_2d: restart_dict['allow_nonisomorphic_2d'] = self.allow_nonisomorphic_2d if self.arkane_level_of_theory is not None: @@ -1256,44 +1259,60 @@ def standardize_output_paths(self): self.output = dict() -def process_adaptive_levels(adaptive_levels: dict | None) -> dict | None: +def process_adaptive_levels(adaptive_levels: list | None) -> dict | None: """ Process the ``adaptive_levels`` argument. + The user-facing form is a YAML-friendly list of entries, each a dictionary with an + ``atom_range`` 2-list (the heavy-atom count range, the upper bound may be the string + ``'inf'`` or ``float('inf')``) and a ``levels`` mapping of job types to levels of theory. + Job types that share a level may be given as a single whitespace- or comma-separated key + (e.g. ``'opt freq'``). A level value may be a string or a ``Level`` dictionary. For example:: + + adaptive_levels = [{'atom_range': [1, 6], + 'levels': {'opt freq': 'wb97xd/def2tzvp', + 'sp': 'ccsd(t)-f12/cc-pvtz-f12'}}, + {'atom_range': [7, 'inf'], + 'levels': {'opt freq': 'b3lyp/6-31g(d,p)', + 'sp': 'dlpno-ccsd(t)/def2-tzvp'}}] + Args: - adaptive_levels (dict): The adaptive levels dictionary. + adaptive_levels (list): The adaptive levels specification (a list of entries). - Returns: dict - The processed adaptive levels dictionary. + Returns: dict | None + The processed adaptive levels keyed by ``(min_heavy_atoms, max_heavy_atoms)`` tuples, + each mapping job-type tuples to ``Level`` objects, or ``None`` if the input is ``None``. """ if adaptive_levels is None: return None - processed = dict() - if not isinstance(adaptive_levels, dict): - raise InputError(f'The adaptive levels argument must be a dictionary, ' + if not isinstance(adaptive_levels, list): + raise InputError(f'The adaptive levels argument must be a list of entries, ' f'got {adaptive_levels} which is a {type(adaptive_levels)}') - for atom_range, adaptive_level in adaptive_levels.items(): - if not isinstance(atom_range, tuple) \ - or not all([isinstance(a, int) or a == 'inf' for a in atom_range]) \ - or len(atom_range) != 2: - raise InputError(f'Keys of the adaptive levels argument must be 2-length tuples of integers or an "inf" ' - f'indicator, got {atom_range} which is a {type(atom_range)} in:\n{adaptive_levels}') - if not isinstance(adaptive_level, dict): - raise InputError(f'Each adaptive level in the adaptive levels argument must be a dictionary, ' - f'got {adaptive_level} which is a {type(adaptive_level)} in:\n{adaptive_levels}') + processed = dict() + for entry in adaptive_levels: + if not isinstance(entry, dict) or 'atom_range' not in entry or 'levels' not in entry: + raise InputError(f'Each adaptive levels entry must be a dictionary with "atom_range" and "levels" ' + f'keys, got {entry} which is a {type(entry)} in:\n{adaptive_levels}') + atom_range = entry['atom_range'] + if not isinstance(atom_range, (list, tuple)) or len(atom_range) != 2 \ + or not isinstance(atom_range[0], int) \ + or not (isinstance(atom_range[1], int) or atom_range[1] in ('inf', float('inf'))): + raise InputError(f'The "atom_range" of each adaptive levels entry must be a 2-length list of an integer ' + f'lower bound and an integer or "inf" upper bound, got {atom_range} ' + f'in:\n{adaptive_levels}') + atom_range = (atom_range[0], 'inf' if atom_range[1] in ('inf', float('inf')) else atom_range[1]) + levels = entry['levels'] + if not isinstance(levels, dict): + raise InputError(f'The "levels" of each adaptive levels entry must be a dictionary, ' + f'got {levels} which is a {type(levels)} in:\n{adaptive_levels}') processed[atom_range] = dict() - for sub_key, level in adaptive_level.items(): - new_sub_key = (sub_key,) if isinstance(sub_key, str) else sub_key - if not isinstance(new_sub_key, tuple): - raise InputError(f'Job types specifications in adaptive levels must be tuples, got {sub_key} ' - f'which is a {type(sub_key)} in:\n{adaptive_levels}') - new_level = Level(repr=level) - processed[atom_range][new_sub_key] = new_level - atom_ranges = sorted(list(adaptive_levels.keys()), key=lambda x: x[0]) + for job_types, level in levels.items(): + job_type_tuple = tuple(job_types.replace(',', ' ').split()) + processed[atom_range][job_type_tuple] = Level(repr=level) + atom_ranges = sorted(processed.keys(), key=lambda x: x[0]) for i, atom_range in enumerate(atom_ranges): if i and atom_ranges[i-1][1] + 1 != atom_ranges[i][0]: - raise InputError(f'Atom ranges of adaptive levels must be consecutive. ' - f'Got:\n{list(adaptive_levels.keys())}') + raise InputError(f'Atom ranges of adaptive levels must be consecutive. Got:\n{atom_ranges}') if atom_ranges[-1][1] != 'inf': raise InputError(f'The last atom range must be "inf", got {atom_ranges[-1][1]} in {atom_ranges}') return processed diff --git a/arc/main_test.py b/arc/main_test.py index 0c0e65ed1c..7dd986b163 100644 --- a/arc/main_test.py +++ b/arc/main_test.py @@ -388,46 +388,72 @@ def test_add_hydrogen_for_bde(self): self.assertIn('H', [spc.label for spc in arc1.species]) def test_process_adaptive_levels(self): - """Test processing the adaptive levels""" - adaptive_levels_1 = {(1, 5): {('opt', 'freq'): 'wb97xd/6-311+g(2d,2p)', - ('sp',): 'ccsd(t)-f12/aug-cc-pvtz-f12'}, - (6, 15): {('opt', 'freq'): 'b3lyp/cbsb7', - 'sp': 'dlpno-ccsd(t)/def2-tzvp'}, - (16, 30): {('opt', 'freq'): 'b3lyp/6-31g(d,p)', - 'sp': {'method': 'wb97xd', 'basis': '6-311+g(2d,2p)'}}, - (31, 'inf'): {('opt', 'freq'): 'b3lyp/6-31g(d,p)', - 'sp': 'b3lyp/6-311+g(d,p)'}} - + """Test processing the adaptive levels (YAML-friendly list-of-entries schema)""" + # None passes through. + self.assertIsNone(process_adaptive_levels(None)) + + # A normal, multi-range specification. Job types sharing a level are given as a + # whitespace- or comma-separated key; a level may be a string or a Level dict. + adaptive_levels_1 = [{'atom_range': [1, 5], + 'levels': {'opt freq': 'wb97xd/6-311+g(2d,2p)', + 'sp': 'ccsd(t)-f12/aug-cc-pvtz-f12'}}, + {'atom_range': [6, 15], + 'levels': {'opt, freq': 'b3lyp/cbsb7', + 'sp': 'dlpno-ccsd(t)/def2-tzvp'}}, + {'atom_range': [16, 30], + 'levels': {'opt freq': 'b3lyp/6-31g(d,p)', + 'sp': {'method': 'wb97xd', 'basis': '6-311+g(2d,2p)'}}}, + {'atom_range': [31, 'inf'], + 'levels': {'opt freq': 'b3lyp/6-31g(d,p)', + 'sp': 'b3lyp/6-311+g(d,p)'}}] processed_1 = process_adaptive_levels(adaptive_levels_1) self.assertEqual(processed_1[(6, 15)][('sp',)].simple(), 'dlpno-ccsd(t)/def2-tzvp') self.assertEqual(processed_1[(16, 30)][('sp',)].simple(), 'wb97xd/6-311+g(2d,2p)') - - # test non dict + self.assertEqual(processed_1[(1, 5)][('opt', 'freq')].simple(), 'wb97xd/6-311+g(2d,2p)') + + # A single range covering everything, and a float 'inf' is accepted as the upper bound. + processed_2 = process_adaptive_levels([{'atom_range': [1, float('inf')], + 'levels': {'opt freq': 'b3lyp/6-31g(d,p)', + 'sp': 'b3lyp/6-311+g(d,p)'}}]) + self.assertEqual(processed_2[(1, 'inf')][('sp',)].simple(), 'b3lyp/6-311+g(d,p)') + + # Restart round-trip: as_dict() must emit the list form and reproduce the same structure. + arc0 = ARC(project='adaptive_levels_test', adaptive_levels=adaptive_levels_1) + restart_levels = arc0.as_dict()['adaptive_levels'] + self.assertIsInstance(restart_levels, list) + reprocessed = process_adaptive_levels(restart_levels) + self.assertEqual(reprocessed[(6, 15)][('sp',)].simple(), 'dlpno-ccsd(t)/def2-tzvp') + self.assertEqual(set(reprocessed.keys()), set(processed_1.keys())) + + # Not a list (the legacy tuple-dict form is no longer accepted). with self.assertRaises(InputError): process_adaptive_levels(4) - # wrong atom range with self.assertRaises(InputError): - process_adaptive_levels({5: {('opt', 'freq'): 'wb97xd/6-311+g(2d,2p)', - ('sp',): 'ccsd(t)-f12/aug-cc-pvtz-f12'}, - (6, 'inf'): {('opt', 'freq'): 'b3lyp/6-31g(d,p)', - 'sp': 'b3lyp/6-311+g(d,p)'}}) - # no 'inf + process_adaptive_levels({(1, 5): {('opt', 'freq'): 'wb97xd/6-311+g(2d,2p)'}, + (6, 'inf'): {'sp': 'b3lyp/6-311+g(d,p)'}}) + # atom_range is not a 2-length list. + with self.assertRaises(InputError): + process_adaptive_levels([{'atom_range': [5], 'levels': {'sp': 'b3lyp/6-311+g(d,p)'}}]) + # 'inf' is only allowed as the upper bound. + with self.assertRaises(InputError): + process_adaptive_levels([{'atom_range': [float('inf'), 'inf'], 'levels': {'sp': 'b3lyp/6-311+g(d,p)'}}]) + with self.assertRaises(InputError): + process_adaptive_levels([{'atom_range': ['inf', 10], 'levels': {'sp': 'b3lyp/6-311+g(d,p)'}}]) + # The last range does not end with 'inf'. + with self.assertRaises(InputError): + process_adaptive_levels([{'atom_range': [1, 5], 'levels': {'sp': 'wb97xd/def2tzvp'}}, + {'atom_range': [6, 75], 'levels': {'sp': 'b3lyp/6-311+g(d,p)'}}]) + # 'levels' is not a dict. with self.assertRaises(InputError): - process_adaptive_levels({(1, 5): {('opt', 'freq'): 'wb97xd/6-311+g(2d,2p)', - ('sp',): 'ccsd(t)-f12/aug-cc-pvtz-f12'}, - (6, 75): {('opt', 'freq'): 'b3lyp/6-31g(d,p)', - 'sp': 'b3lyp/6-311+g(d,p)'}}) - # adaptive level not a dict + process_adaptive_levels([{'atom_range': [1, 5], 'levels': {'sp': 'wb97xd/def2tzvp'}}, + {'atom_range': [6, 'inf'], 'levels': 'b3lyp/6-31g(d,p)'}]) + # Non-consecutive atom ranges. with self.assertRaises(InputError): - process_adaptive_levels({(1, 5): {('opt', 'freq'): 'wb97xd/6-311+g(2d,2p)', - ('sp',): 'ccsd(t)-f12/aug-cc-pvtz-f12'}, - (6, 'inf'): 'b3lyp/6-31g(d,p)'}) - # non-consecutive atom ranges + process_adaptive_levels([{'atom_range': [1, 5], 'levels': {'sp': 'wb97xd/def2tzvp'}}, + {'atom_range': [15, 'inf'], 'levels': {'sp': 'b3lyp/6-311+g(d,p)'}}]) + # An entry missing required keys. with self.assertRaises(InputError): - process_adaptive_levels({(1, 5): {('opt', 'freq'): 'wb97xd/6-311+g(2d,2p)', - ('sp',): 'ccsd(t)-f12/aug-cc-pvtz-f12'}, - (15, 'inf'): {('opt', 'freq'): 'b3lyp/6-31g(d,p)', - 'sp': 'b3lyp/6-311+g(d,p)'}}) + process_adaptive_levels([{'levels': {'sp': 'wb97xd/def2tzvp'}}]) def test_process_level_of_theory(self): """ diff --git a/arc/scheduler.py b/arc/scheduler.py index 9a62477d86..0c32d9f6b9 100644 --- a/arc/scheduler.py +++ b/arc/scheduler.py @@ -52,6 +52,7 @@ from arc.level import Level from arc.species.species import (ARCSpecies, are_coords_compliant_with_graph, + check_label, determine_rotor_symmetry, TSGuess) from arc.species.converter import (check_isomorphism, @@ -333,6 +334,8 @@ def __init__(self, self.save_restart = False if len(self.rxn_list): + if self.adaptive_levels is not None: + self._apply_adaptive_reaction_levels() rxn_info_path = self.make_reaction_labels_info_file() for rxn in self.rxn_list: logger.info('\n\n') @@ -941,8 +944,10 @@ def run_job(self, torsions = species.rotors_dict[rotor_index]['torsion'] torsions = [torsions] if not isinstance(torsions[0], list) else torsions if self.adaptive_levels is not None and label is not None: + spc = self.species_dict[label] + heavy_atoms = spc.adaptive_lot_n_heavy if spc.adaptive_lot_n_heavy is not None else spc.number_of_heavy_atoms level_of_theory = self.determine_adaptive_level(original_level_of_theory=level_of_theory, job_type=job_type, - heavy_atoms=self.species_dict[label].number_of_heavy_atoms) + heavy_atoms=heavy_atoms) job_adapter = job_adapter.lower() if job_adapter is not None else \ self.deduce_job_adapter(level=Level(repr=level_of_theory), job_type=job_type) args = {'keyword': {}, 'block': {}} @@ -3957,6 +3962,78 @@ def determine_adaptive_level(self, # for any other job type use the original level of theory regardless of the number of heavy atoms return original_level_of_theory + def _adaptive_atom_range(self, heavy_atoms: int) -> tuple | None: + """ + Determine the adaptive levels atom range (the LOT grain) that a heavy-atom count falls into. + + Args: + heavy_atoms (int): The number of heavy atoms. + + Returns: + tuple | None: The ``(min, max)`` atom range key from ``self.adaptive_levels``, or ``None`` if not found. + """ + for atom_range in self.adaptive_levels.keys(): + if (atom_range[1] == 'inf' and heavy_atoms >= atom_range[0]) \ + or (atom_range[1] != 'inf' and atom_range[0] <= heavy_atoms <= atom_range[1]): + return atom_range + return None + + def _apply_adaptive_reaction_levels(self): + """ + Make every reaction's energetics internally consistent under adaptive levels of theory. + + Heavy atoms are conserved across a reaction, so the reaction-wide heavy-atom count (the sum over the reactants, + equal to the TS supermolecule count) keys a single adaptive level for the whole reaction. A reaction + participant whose own heavy-atom count lands it on a *different* (finer) adaptive grain than the reaction + would otherwise be evaluated at an inconsistent level, mixing levels of theory across the barrier. To avoid + this, each such participant whose ``thermo_at_own_level`` is ``False`` (the default) is itself evaluated at + the reaction-wide level (no copy). If ``thermo_at_own_level`` is ``True``, an autonomous relabeled copy of + the species is created from the outset and used by the reaction (evaluated at the reaction-wide level), + while the original species is left to compute its own thermochemistry at its own granular level. A copy is + also created for a no-copy participant that is shared across reactions landing on different grains, since a + single per-species override cannot keep both reactions internally consistent. + + This runs once during setup, before the reactions are processed, so each reaction is defined with its copies + from the start (its label, reactants, and products are kept mutually consistent for ``check_attributes`` and + restart). + """ + for rxn_i, rxn in enumerate(self.rxn_list): + rxn_index = rxn.index if rxn.index is not None else rxn_i + reactant_species = [self.species_dict[label] for label in rxn.reactants if label in self.species_dict] + if len(reactant_species) != len(rxn.reactants) \ + or any(spc.number_of_heavy_atoms is None for spc in reactant_species): + logger.warning(f'Could not determine reaction-wide adaptive levels for {rxn.label}, ' + f'using per-species levels.') + continue + reaction_n_heavy = sum(spc.number_of_heavy_atoms for spc in reactant_species) + reaction_range = self._adaptive_atom_range(reaction_n_heavy) + for participants in (rxn.reactants, rxn.products): + for pos, label in enumerate(participants): + spc = self.species_dict.get(label) + if spc is None or spc.number_of_heavy_atoms is None \ + or self._adaptive_atom_range(spc.number_of_heavy_atoms) == reaction_range: + continue + if not spc.thermo_at_own_level: + if spc.adaptive_lot_n_heavy is None \ + or self._adaptive_atom_range(spc.adaptive_lot_n_heavy) == reaction_range: + spc.adaptive_lot_n_heavy = reaction_n_heavy + continue + logger.warning(f'Species {label} participates in reactions on different adaptive level ' + f'grains, creating a dedicated copy of it for reaction {rxn.label} to keep ' + f'the reaction internally consistent.') + copy_label = check_label(f'{label}_TS{rxn_index}')[0] + if copy_label not in self.species_dict: + copy_spc = spc.copy() + copy_spc.label = copy_label + copy_spc.adaptive_lot_n_heavy = reaction_n_heavy + copy_spc.compute_thermo = False + copy_spc.include_in_thermo_lib = False + self.species_list.append(copy_spc) + self.species_dict[copy_label] = copy_spc + self.initialize_output_dict(copy_label) + participants[pos] = copy_label + rxn.label = rxn.arrow.join([rxn.plus.join(rxn.reactants), rxn.plus.join(rxn.products)]) + def initialize_output_dict(self, label: str | None = None): """ Initialize self.output. diff --git a/arc/scheduler_test.py b/arc/scheduler_test.py index fea28e86aa..469df61205 100644 --- a/arc/scheduler_test.py +++ b/arc/scheduler_test.py @@ -1273,5 +1273,167 @@ def test_spawn_ts_jobs_unknown_family_admission_predicate(self): self.assertEqual(admit_unknown_family, expected_admission) +class TestSchedulerAdaptiveReactionLevels(unittest.TestCase): + """ + Contains unit tests for the reaction-wide adaptive levels of theory logic + (Scheduler._apply_adaptive_reaction_levels and the spawn_job override). + """ + @classmethod + def setUpClass(cls): + """A method that is run before all unit tests in this class.""" + cls.ess_settings = {'gaussian': ['server1'], 'molpro': ['server2', 'server1'], 'qchem': ['server1']} + # A grain at exactly 1 heavy atom, and another for everything larger, so single-heavy-atom wells + # (e.g. CH4, OH) land on a different grain than a 2-heavy-atom reaction. + cls.adaptive_levels = {(1, 1): {('opt', 'freq'): Level(repr='wb97xd/def2tzvp'), + ('sp',): Level(repr='ccsd(t)-f12/cc-pvtz-f12')}, + (2, 'inf'): {('opt', 'freq'): Level(repr='b3lyp/6-31g(d,p)'), + ('sp',): Level(repr='b3lyp/6-311+g(d,p)')}} + + def build_scheduler(self, rxn, species_list, name): + """ + Build a testing Scheduler for a single reaction under the class adaptive levels. + + Args: + rxn (ARCReaction): The reaction. + species_list (list): The species list. + name (str): A unique project name. + + Returns: + Scheduler: The constructed (testing) scheduler. + """ + project_directory = os.path.join(ARC_PATH, 'Projects', f'{name}_delete') + self.addCleanup(shutil.rmtree, project_directory, ignore_errors=True) + return Scheduler(project=name, + ess_settings=self.ess_settings, + species_list=species_list, + rxn_list=[rxn], + opt_level=Level(repr='b3lyp/6-31g(d,p)'), + sp_level=Level(repr='b3lyp/6-311+g(d,p)'), + freq_level=Level(repr='b3lyp/6-31g(d,p)'), + adaptive_levels=self.adaptive_levels, + project_directory=project_directory, + job_types=initialize_job_types(), + testing=True) + + def test_bimolecular_creates_copies(self): + """Test that with thermo_at_own_level=True, wells on a different grain get relabeled copies""" + r = [ARCSpecies(label='CH4', smiles='C', thermo_at_own_level=True), + ARCSpecies(label='OH', smiles='[OH]', thermo_at_own_level=True)] + p = [ARCSpecies(label='CH3', smiles='[CH3]', thermo_at_own_level=True), + ARCSpecies(label='H2O', smiles='O', thermo_at_own_level=True)] + rxn = ARCReaction(label='CH4 + OH <=> CH3 + H2O', r_species=r, p_species=p) + sched = self.build_scheduler(rxn, r + p, 'adaptive_bimol') + + # The reaction is now defined with the copies, and stays self-consistent. + self.assertEqual(rxn.reactants, ['CH4_TS0', 'OH_TS0']) + self.assertEqual(rxn.products, ['CH3_TS0', 'H2O_TS0']) + self.assertEqual([s.label for s in rxn.r_species], ['CH4_TS0', 'OH_TS0']) + self.assertEqual(rxn.label, 'CH4_TS0 + OH_TS0 <=> CH3_TS0 + H2O_TS0') + rxn.check_attributes() # Must not raise. + + # The copies are autonomous, evaluated at the reaction-wide level, and not part of the thermo library. + for copy_label in ['CH4_TS0', 'OH_TS0', 'CH3_TS0', 'H2O_TS0']: + self.assertIn(copy_label, sched.species_dict) + self.assertIn(copy_label, sched.output) + copy_spc = sched.species_dict[copy_label] + self.assertEqual(copy_spc.adaptive_lot_n_heavy, 2) + self.assertFalse(copy_spc.compute_thermo) + self.assertFalse(copy_spc.include_in_thermo_lib) + + # The originals are untouched - they keep their own granular level and their own thermo. + for original_label in ['CH4', 'OH', 'CH3', 'H2O']: + original = sched.species_dict[original_label] + self.assertIsNone(original.adaptive_lot_n_heavy) + self.assertTrue(original.compute_thermo) + + def test_thermo_at_own_level_default_no_copy(self): + """Test that by default (thermo_at_own_level=False) the species itself takes the reaction-wide level, no copy""" + r = [ARCSpecies(label='CH4', smiles='C'), ARCSpecies(label='OH', smiles='[OH]')] + p = [ARCSpecies(label='CH3', smiles='[CH3]'), ARCSpecies(label='H2O', smiles='O')] + rxn = ARCReaction(label='CH4 + OH <=> CH3 + H2O', r_species=r, p_species=p) + sched = self.build_scheduler(rxn, r + p, 'adaptive_default') + + self.assertEqual(rxn.label, 'CH4 + OH <=> CH3 + H2O') + self.assertFalse(any('_TS' in label for label in sched.species_dict)) + self.assertEqual(sched.species_dict['CH4'].adaptive_lot_n_heavy, 2) + + def test_shared_species_across_grains_gets_copy(self): + """Test that a no-copy species shared by reactions on different grains gets a copy for the second reaction""" + oh = ARCSpecies(label='OH', smiles='[OH]') + h2o = ARCSpecies(label='H2O', smiles='O') + rxn1 = ARCReaction(label='CH4 + OH <=> CH3 + H2O', + r_species=[ARCSpecies(label='CH4', smiles='C'), oh], + p_species=[ARCSpecies(label='CH3', smiles='[CH3]'), h2o]) + rxn2 = ARCReaction(label='C3H8 + OH <=> nC3H7 + H2O', + r_species=[ARCSpecies(label='C3H8', smiles='CCC'), oh], + p_species=[ARCSpecies(label='nC3H7', smiles='[CH2]CC'), h2o]) + project_directory = os.path.join(ARC_PATH, 'Projects', 'adaptive_shared_delete') + self.addCleanup(shutil.rmtree, project_directory, ignore_errors=True) + species_list = rxn1.r_species + rxn1.p_species + [rxn2.r_species[0], rxn2.p_species[0]] + sched = Scheduler(project='adaptive_shared', + ess_settings=self.ess_settings, + species_list=species_list, + rxn_list=[rxn1, rxn2], + opt_level=Level(repr='b3lyp/6-31g(d,p)'), + sp_level=Level(repr='b3lyp/6-311+g(d,p)'), + freq_level=Level(repr='b3lyp/6-31g(d,p)'), + adaptive_levels={(1, 1): {('sp',): Level(repr='ccsd(t)-f12/cc-pvtz-f12')}, + (2, 3): {('sp',): Level(repr='dlpno-ccsd(t)/def2-tzvp')}, + (4, 'inf'): {('sp',): Level(repr='b3lyp/6-311+g(d,p)')}}, + project_directory=project_directory, + job_types=initialize_job_types(), + testing=True) + + # rxn1 (2 heavy atoms) set the shared wells' overrides; rxn1 itself is unchanged. + self.assertEqual(rxn1.label, 'CH4 + OH <=> CH3 + H2O') + self.assertEqual(sched.species_dict['OH'].adaptive_lot_n_heavy, 2) + # rxn2 (4 heavy atoms) lands on a different grain, so the shared wells got dedicated copies. + self.assertEqual(set(rxn2.reactants), {'C3H8', 'OH_TS1'}) + self.assertEqual(set(rxn2.products), {'nC3H7', 'H2O_TS1'}) + self.assertEqual(rxn2.label, + rxn2.arrow.join([rxn2.plus.join(rxn2.reactants), rxn2.plus.join(rxn2.products)])) + for copy_label in ['OH_TS1', 'H2O_TS1']: + self.assertEqual(sched.species_dict[copy_label].adaptive_lot_n_heavy, 4) + self.assertFalse(sched.species_dict[copy_label].compute_thermo) + # Unshared rxn2 wells just took the rxn2 override, no copies. + self.assertEqual(sched.species_dict['C3H8'].adaptive_lot_n_heavy, 4) + self.assertEqual(sched.species_dict['nC3H7'].adaptive_lot_n_heavy, 4) + + def test_unimolecular_no_copy(self): + """Test that a reaction whose well shares the reaction's grain gets no copies""" + r = [ARCSpecies(label='nC3H7', smiles='[CH2]CC')] + p = [ARCSpecies(label='iC3H7', smiles='C[CH]C')] + rxn = ARCReaction(label='nC3H7 <=> iC3H7', r_species=r, p_species=p) + sched = self.build_scheduler(rxn, r + p, 'adaptive_unimol') + + self.assertEqual(rxn.label, 'nC3H7 <=> iC3H7') + self.assertFalse(any('_TS' in label for label in sched.species_dict)) + self.assertIsNone(sched.species_dict['nC3H7'].adaptive_lot_n_heavy) + + def test_spawn_job_uses_override(self): + """Test that determine_adaptive_level is driven by adaptive_lot_n_heavy when set""" + spc = ARCSpecies(label='CH4', smiles='C') + sched = Scheduler(project='adaptive_override', + ess_settings=self.ess_settings, + species_list=[spc], + opt_level=Level(repr='b3lyp/6-31g(d,p)'), + sp_level=Level(repr='b3lyp/6-311+g(d,p)'), + freq_level=Level(repr='b3lyp/6-31g(d,p)'), + adaptive_levels=self.adaptive_levels, + project_directory=os.path.join(ARC_PATH, 'Projects', 'adaptive_override_delete'), + job_types=initialize_job_types(), + testing=True) + self.addCleanup(shutil.rmtree, os.path.join(ARC_PATH, 'Projects', 'adaptive_override_delete'), + ignore_errors=True) + original = Level(method='CBS-QB3') + # 1 heavy atom (own count) -> the (1, 1) grain. + self.assertEqual(sched.determine_adaptive_level(original, 'sp', spc.number_of_heavy_atoms).simple(), + 'ccsd(t)-f12/cc-pvtz-f12') + # With the override set to a 2-heavy-atom reaction, the (2, 'inf') grain is used instead. + spc.adaptive_lot_n_heavy = 2 + heavy = spc.adaptive_lot_n_heavy if spc.adaptive_lot_n_heavy is not None else spc.number_of_heavy_atoms + self.assertEqual(sched.determine_adaptive_level(original, 'sp', heavy).simple(), 'b3lyp/6-311+g(d,p)') + + if __name__ == '__main__': unittest.main(testRunner=unittest.TextTestRunner(verbosity=2)) diff --git a/arc/species/species.py b/arc/species/species.py index 420f84cd40..ce2f4d8f63 100644 --- a/arc/species/species.py +++ b/arc/species/species.py @@ -109,6 +109,11 @@ class ARCSpecies(object): bond_corrections (dict, optional): The bond additivity corrections (BAC) to be used. Determined from the structure if not directly given. compute_thermo (bool, optional): Whether to calculate thermodynamic properties for this species. + thermo_at_own_level (bool, optional): Relevant only when ``adaptive_levels`` are used. If ``True``, the + species' thermochemistry is computed at its own size-appropriate (granular) adaptive level even when it + participates in a reaction; the reaction's energetics then use a separate relabeled copy of the species + evaluated at the reaction-consistent level. If ``False`` (default), the species itself is evaluated at the + reaction-consistent (coarser) level and no copy is made. include_in_thermo_lib (bool, optional): Whether to include in the output RMG library. e0_only (bool, optional): Whether to only run statmech (w/o thermo) to compute E0. species_dict (dict, optional): A dictionary to create this object from (used when restarting ARC). @@ -231,6 +236,11 @@ class ARCSpecies(object): t1 (float): The T1 diagnostic parameter from Molpro. neg_freqs_trshed (list): A list of negative frequencies this species was troubleshooted for. compute_thermo (bool): Whether to calculate thermodynamic properties for this species. + thermo_at_own_level (bool): Whether to compute this species' thermochemistry at its own granular adaptive + level even when it participates in a reaction (see the constructor argument; relevant for ``adaptive_levels``). + adaptive_lot_n_heavy (int): An internal override for adaptive-levels selection - the heavy-atom count to key the + level by instead of this species' own count (set to the reaction-wide count for reaction participants). + ``None`` means use the species' own heavy-atom count. include_in_thermo_lib (bool): Whether to include in the output RMG library. e0_only (bool): Whether to only run statmech (w/o thermo) to compute E0. thermo (ThermoData): The thermo data calculated by ARC with 'H298' in kJ/mol and 'S298' in J/mol*K. @@ -307,6 +317,8 @@ def __init__(self, charge: int | None = None, checkfile: str | None = None, compute_thermo: bool | None = None, + thermo_at_own_level: bool = False, + adaptive_lot_n_heavy: int | None = None, include_in_thermo_lib: bool | None = True, consider_all_diastereomers: bool = True, directed_rotors: dict | None = None, @@ -409,6 +421,8 @@ def __init__(self, self.chosen_ts_method = None self.chosen_ts_list = list() self.compute_thermo = compute_thermo if compute_thermo is not None else not self.is_ts + self.thermo_at_own_level = thermo_at_own_level + self.adaptive_lot_n_heavy = adaptive_lot_n_heavy self.include_in_thermo_lib = include_in_thermo_lib self.e0_only = e0_only self.long_thermo_description = '' @@ -688,6 +702,10 @@ def as_dict(self, species_dict['charge'] = self.charge if not self.compute_thermo and not self.is_ts: species_dict['compute_thermo'] = self.compute_thermo + if self.thermo_at_own_level: + species_dict['thermo_at_own_level'] = self.thermo_at_own_level + if self.adaptive_lot_n_heavy is not None: + species_dict['adaptive_lot_n_heavy'] = self.adaptive_lot_n_heavy if not self.include_in_thermo_lib: species_dict['include_in_thermo_lib'] = self.include_in_thermo_lib species_dict['number_of_rotors'] = self.number_of_rotors @@ -884,6 +902,8 @@ def from_dict(self, species_dict): self.multi_species = species_dict['multi_species'] if 'multi_species' in species_dict else None self.charge = species_dict['charge'] if 'charge' in species_dict else 0 self.compute_thermo = species_dict['compute_thermo'] if 'compute_thermo' in species_dict else not self.is_ts + self.thermo_at_own_level = species_dict['thermo_at_own_level'] if 'thermo_at_own_level' in species_dict else False + self.adaptive_lot_n_heavy = species_dict['adaptive_lot_n_heavy'] if 'adaptive_lot_n_heavy' in species_dict else None self.include_in_thermo_lib = species_dict['include_in_thermo_lib'] if 'include_in_thermo_lib' in species_dict else True self.e0_only = species_dict['e0_only'] if 'e0_only' in species_dict else False self.number_of_radicals = species_dict['number_of_radicals'] if 'number_of_radicals' in species_dict else None diff --git a/arc/species/species_test.py b/arc/species/species_test.py index 9cd24fd4dc..67763c2c60 100644 --- a/arc/species/species_test.py +++ b/arc/species/species_test.py @@ -608,6 +608,26 @@ def test_as_dict(self): self.assertEqual(spc_dict['mol']['atoms'][0]['element']['isotope'], -1) self.assertEqual(spc_dict['mol']['atoms'][0]['atomtype'], 'Cs') + def test_thermo_at_own_level_round_trip(self): + """Test that thermo_at_own_level and adaptive_lot_n_heavy round-trip through as_dict/from_dict""" + # Defaults: not serialized, restored to False / None. + default_spc = ARCSpecies(label='ethane', smiles='CC') + default_dict = default_spc.as_dict() + self.assertNotIn('thermo_at_own_level', default_dict) + self.assertNotIn('adaptive_lot_n_heavy', default_dict) + restored_default = ARCSpecies(species_dict=default_dict) + self.assertFalse(restored_default.thermo_at_own_level) + self.assertIsNone(restored_default.adaptive_lot_n_heavy) + + # Non-default values: serialized and restored. + spc = ARCSpecies(label='ethane', smiles='CC', thermo_at_own_level=True, adaptive_lot_n_heavy=8) + spc_dict = spc.as_dict() + self.assertTrue(spc_dict['thermo_at_own_level']) + self.assertEqual(spc_dict['adaptive_lot_n_heavy'], 8) + restored = ARCSpecies(species_dict=spc_dict) + self.assertTrue(restored.thermo_at_own_level) + self.assertEqual(restored.adaptive_lot_n_heavy, 8) + def test_from_dict(self): """Test Species.from_dict()""" species_dict = self.spc2.as_dict() diff --git a/docs/source/advanced.rst b/docs/source/advanced.rst index cb62e2be06..89e9a4abcb 100644 --- a/docs/source/advanced.rst +++ b/docs/source/advanced.rst @@ -251,25 +251,44 @@ The method returns a dictionary containing the ``'e_o'`` tuple (electrons, orbit Adaptive levels of theory ^^^^^^^^^^^^^^^^^^^^^^^^^ ARC allows users to adapt the level of theory to the size of the molecule. -To do so, pass the ``adaptive_levels`` attribute, which is a dictionary of -levels of theory for ranges of the number of heavy (non-hydrogen) atoms in the -molecule. Keys are tuples of (``min_num_atoms``, ``max_num_atoms``), values are -dictionaries with the a tuple of job type as the key (``opt``, ``freq``, ``sp``, ``scan``) -and the respective level of theory as a string or a dictionary is the value. -Don't forget to bound the entire range between 1 and ``inf``, also make sure -there aren't any gaps in the heavy atom ranges. For example:: - - adaptive_levels = {(1, 5): {('opt', 'freq'): 'wb97xd/6-311+g(2d,2p)', - 'sp': 'ccsd(t)-f12/aug-cc-pvtz-f12'}, - (6, 15): {('opt', 'freq'): 'b3lyp/cbsb7', - 'sp': 'dlpno-ccsd(t)/def2-tzvp/c'}, - (16, 30): {('opt', 'freq'): 'b3lyp/6-31g(d,p)', - 'sp': 'wb97xd/6-311+g(2d,2p)'}, - (31, 'inf'): {('opt', 'freq'): 'b3lyp/6-31g(d,p)', - 'sp': 'b3lyp/6-311+g(d,p)'}} - -Note that job types which are not specified in ``adaptive_levels`` will use no-adaptive -(defined separately e.g., via ``opt_level``, or using ARC's defaults. +To do so, pass the ``adaptive_levels`` attribute, which is a list of entries. +Each entry is a dictionary with an ``atom_range`` 2-list giving the heavy +(non-hydrogen) atom count range (the upper bound may be ``inf``), and a ``levels`` +mapping from job type to the respective level of theory (a string or a dictionary). +Job types that share a level may be given as a single whitespace- or comma-separated +key (e.g. ``opt freq``). Don't forget to bound the entire range between 1 and +``inf``, and make sure there aren't any gaps in the heavy atom ranges. For example:: + + adaptive_levels: + - atom_range: [1, 5] + levels: + opt freq: wb97xd/6-311+g(2d,2p) + sp: ccsd(t)-f12/aug-cc-pvtz-f12 + - atom_range: [6, 15] + levels: + opt freq: b3lyp/cbsb7 + sp: dlpno-ccsd(t)/def2-tzvp/c + - atom_range: [16, 30] + levels: + opt freq: b3lyp/6-31g(d,p) + sp: wb97xd/6-311+g(2d,2p) + - atom_range: [31, inf] + levels: + opt freq: b3lyp/6-31g(d,p) + sp: b3lyp/6-311+g(d,p) + +Note that job types which are not specified in ``adaptive_levels`` will use non-adaptive +levels (defined separately, e.g., via ``opt_level``, or using ARC's defaults). + +When a species participates in a reaction, all of the reaction's species (the TS and the +reactant/product wells) are energy-evaluated at a single, reaction-consistent level keyed +by the largest participant (the TS), so the barrier is computed at one consistent level. +By default (the per-species ``thermo_at_own_level`` flag, ``False``) a well whose own size +falls on a finer grain than its reaction's is evaluated directly at the reaction-wide +(coarser) level, and its thermochemistry uses that same level. Set ``thermo_at_own_level=True`` on a species +to instead compute its thermochemistry at its own size-appropriate (granular) adaptive level: +an autonomous, relabeled copy of the species is then created and used by the reaction (at the +reaction-wide level), while the original keeps its own level for thermochemistry. Control job memory allocation