|
| 1 | +################################################################################# |
| 2 | +# FOQUS Copyright (c) 2012 - 2023, by the software owners: Oak Ridge Institute |
| 3 | +# for Science and Education (ORISE), TRIAD National Security, LLC., Lawrence |
| 4 | +# Livermore National Security, LLC., The Regents of the University of |
| 5 | +# California, through Lawrence Berkeley National Laboratory, Battelle Memorial |
| 6 | +# Institute, Pacific Northwest Division through Pacific Northwest National |
| 7 | +# Laboratory, Carnegie Mellon University, West Virginia University, Boston |
| 8 | +# University, the Trustees of Princeton University, The University of Texas at |
| 9 | +# Austin, URS Energy & Construction, Inc., et al. All rights reserved. |
| 10 | +# |
| 11 | +# Please see the file LICENSE.md for full copyright and license information, |
| 12 | +# respectively. This file is also available online at the URL |
| 13 | +# "https://github.com/CCSI-Toolset/FOQUS". |
| 14 | +################################################################################# |
| 15 | +import time |
| 16 | +from typing import Dict, List, Optional, Tuple, TypedDict, Union |
| 17 | + |
| 18 | +import dask.bag as db |
| 19 | +import numpy as np |
| 20 | +import pandas as pd # only used for the final output of criterion |
| 21 | +from scipy.stats import rankdata |
| 22 | + |
| 23 | +from .distance import compute_dist, compute_min_params |
| 24 | +from .nusf import compute_dmat, scale_xs, scale_y, update_min_dist |
| 25 | + |
| 26 | + |
| 27 | +def criterion( |
| 28 | + cand: pd.DataFrame, # candidates |
| 29 | + args: TypedDict( |
| 30 | + "args", |
| 31 | + { |
| 32 | + "icol": str, |
| 33 | + "xcols": List, |
| 34 | + "wcol": str, |
| 35 | + "max_iterations": int, |
| 36 | + "mwr_values": List, |
| 37 | + "scale_method": str, |
| 38 | + }, |
| 39 | + ), # maximum number of iterations, mwr values, scale method, index types |
| 40 | + nr: int, # number of restarts (each restart uses a random set of <nd> points) |
| 41 | + nd: int, # design size <= len(candidates) |
| 42 | + mode: str = "maximin", |
| 43 | + hist: Optional[pd.DataFrame] = None, |
| 44 | + rand_seed: Union[int, None] = None, |
| 45 | +) -> Dict: |
| 46 | + |
| 47 | + ncand = len(cand) |
| 48 | + if hist is not None: |
| 49 | + nhist = len(hist) |
| 50 | + assert nd <= ncand, "Number of designs exceeds number of available candidates." |
| 51 | + |
| 52 | + mode = mode.lower() |
| 53 | + assert ( |
| 54 | + mode == "maximin" |
| 55 | + ), "MODE {} not recognized for NUSF. Only MAXIMIN is currently supported.".format( |
| 56 | + mode |
| 57 | + ) |
| 58 | + |
| 59 | + T = args["max_iterations"] |
| 60 | + mwr_vals = args["mwr_values"] |
| 61 | + scale_method = args["scale_method"] |
| 62 | + |
| 63 | + # index types |
| 64 | + idx_np = [cand.columns.get_loc(i) for i in args["xcols"]] |
| 65 | + idw_np = cand.columns.get_loc(args["wcol"]) |
| 66 | + |
| 67 | + cand_np_ = cand.to_numpy() |
| 68 | + cand_np_unscaled = cand_np_.copy() |
| 69 | + |
| 70 | + # Combine candidates and history before scaling |
| 71 | + if hist is None: |
| 72 | + cand_np_, _xmin, _xmax = scale_xs(cand_np_, idx_np) |
| 73 | + else: |
| 74 | + cand_np_, _xmin, _xmax = scale_xs( |
| 75 | + np.concatenate((cand_np_, hist.to_numpy())), idx_np |
| 76 | + ) |
| 77 | + |
| 78 | + rand_gen = np.random.default_rng(rand_seed) |
| 79 | + |
| 80 | + def step( |
| 81 | + mwr_tuple: Tuple[int, List[int], np.ndarray, Union[np.ndarray, None]] |
| 82 | + ) -> Dict: |
| 83 | + mwr, rands, cand_np, hist_np = mwr_tuple |
| 84 | + |
| 85 | + best_cand = [] |
| 86 | + best_md = 0 |
| 87 | + best_mties = 0 |
| 88 | + best_index = [] |
| 89 | + |
| 90 | + t0 = time.time() |
| 91 | + |
| 92 | + def choose(rand_index): |
| 93 | + # extract the <nd> rows |
| 94 | + rcand = cand_np[rand_index] |
| 95 | + dmat = compute_dmat(rcand, idx_np, idw_np, hist=hist_np) |
| 96 | + md, mdpts, mties = compute_min_params(dmat) |
| 97 | + |
| 98 | + update_ = True |
| 99 | + t = 0 |
| 100 | + |
| 101 | + while update_ and (t < T): |
| 102 | + |
| 103 | + ( |
| 104 | + rcand_, |
| 105 | + md_, |
| 106 | + mdpts_, |
| 107 | + mties_, |
| 108 | + dmat_, |
| 109 | + added_, |
| 110 | + removed_, |
| 111 | + update_, |
| 112 | + ) = update_min_dist( |
| 113 | + rcand, |
| 114 | + cand_np, |
| 115 | + ncand, |
| 116 | + idx_np, |
| 117 | + idw_np, |
| 118 | + md, |
| 119 | + mdpts, |
| 120 | + mties, |
| 121 | + dmat, |
| 122 | + hist=hist_np, |
| 123 | + rand_seed=rand_seed + rand_index if rand_seed is not None else None, |
| 124 | + ) |
| 125 | + |
| 126 | + if update_: |
| 127 | + rcand = rcand_ |
| 128 | + md = md_ |
| 129 | + mdpts = mdpts_ |
| 130 | + mties = mties_ |
| 131 | + dmat = dmat_ |
| 132 | + if added_: |
| 133 | + rand_index[removed_] = added_ |
| 134 | + |
| 135 | + t += 1 |
| 136 | + return { |
| 137 | + "index": rand_index, |
| 138 | + "cand": rcand, |
| 139 | + "md": md, |
| 140 | + "mdpts": mdpts, |
| 141 | + "mties": mties, |
| 142 | + "dmat": dmat, |
| 143 | + } |
| 144 | + |
| 145 | + for x in db.from_sequence(rands).map(choose): |
| 146 | + if (x["md"] > best_md) or ( |
| 147 | + (x["md"] == best_md) and (x["mties"] < best_mties) |
| 148 | + ): |
| 149 | + best_index = x["index"] |
| 150 | + best_cand = x["cand"] |
| 151 | + best_md = x["md"] |
| 152 | + best_mdpts = x["mdpts"] |
| 153 | + best_mties = x["mties"] |
| 154 | + best_dmat = x["dmat"] |
| 155 | + print("Best minimum distance for this random start: {}".format(best_md)) |
| 156 | + |
| 157 | + elapsed_time = time.time() - t0 |
| 158 | + |
| 159 | + # no need to inverse-scale; can just use the indices to look up original rows in cand_ |
| 160 | + best_cand_unscaled = cand_np_unscaled[best_index] |
| 161 | + |
| 162 | + best_cand = pd.DataFrame(best_cand, index=best_index, columns=list(cand)) |
| 163 | + best_cand_unscaled = pd.DataFrame( |
| 164 | + best_cand_unscaled, index=best_index, columns=list(cand) |
| 165 | + ) |
| 166 | + |
| 167 | + results = { |
| 168 | + "best_cand_scaled": best_cand, |
| 169 | + "best_cand": best_cand_unscaled, |
| 170 | + "best_index": best_index, |
| 171 | + "best_val": best_md, |
| 172 | + "best_mdpts": best_mdpts, |
| 173 | + "best_mties": best_mties, |
| 174 | + "best_dmat": best_dmat, |
| 175 | + "mode": mode, |
| 176 | + "design_size": nd, |
| 177 | + "num_restarts": nr, |
| 178 | + "mwr": mwr, |
| 179 | + "elapsed_time": elapsed_time, |
| 180 | + } |
| 181 | + |
| 182 | + return results |
| 183 | + |
| 184 | + rands, cand_nps, hist_nps = [], [], [] |
| 185 | + for mwr in mwr_vals: |
| 186 | + cand_np = scale_y(scale_method, mwr, cand_np_, idw_np) |
| 187 | + |
| 188 | + if hist is None: |
| 189 | + hist_np = None |
| 190 | + else: |
| 191 | + hist_np = cand_np[-nhist:] |
| 192 | + cand_np = cand_np[:ncand] |
| 193 | + |
| 194 | + wts = cand_np[:, idw_np] |
| 195 | + wts_sum = np.sum(wts) |
| 196 | + prob = wts / wts_sum |
| 197 | + rand = [] |
| 198 | + for i in range(nr): |
| 199 | + # sample without replacement <nd> indices |
| 200 | + rand.append(rand_gen.choice(ncand, nd, replace=False, p=prob)) |
| 201 | + rands.append(rand) |
| 202 | + cand_nps.append(cand_np) |
| 203 | + hist_nps.append(hist_np) |
| 204 | + |
| 205 | + results = db.from_sequence(zip(mwr_vals, rands, cand_nps, hist_nps)).map(step) |
| 206 | + return dict(zip(mwr_vals, results.compute())) |
0 commit comments