Skip to content

Commit 3e4ff22

Browse files
committed
Implementing scaling.py functions in plugins
1 parent bcf7593 commit 3e4ff22

4 files changed

Lines changed: 117 additions & 52 deletions

File tree

foqus_lib/framework/surrogate/keras_nn.py

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from pathlib import Path
4242
from tokenize import String
4343

44+
from typing import Tuple
4445
import numpy as np
4546
import pandas as pd
4647
import tensorflow as tf # pylint: disable=import-error
@@ -52,6 +53,19 @@
5253
from foqus_lib.framework.surrogate.surrogate import surrogate
5354
from foqus_lib.framework.uq.SurrogateParser import SurrogateParser
5455

56+
from foqus_lib.framework.surrogate.scaling import (
57+
BaseScaler,
58+
LinearScaler,
59+
LogScaler,
60+
LogScaler2,
61+
PowerScaler,
62+
PowerScaler2,
63+
map_name_to_scaler,
64+
scale_dataframe
65+
)
66+
67+
# mapping between the human-readable name for the scaling variant
68+
# and an instance of the corresponding scaler class
5569

5670
# custom class to define Keras NN layers
5771
@tf.keras.utils.register_keras_serializable()
@@ -293,6 +307,14 @@ def __init__(self, dat=None):
293307
desc="Name of output file for model, should have file extension: .keras",
294308
hint="Enter a custom file name if desired",
295309
)
310+
# add option for normalization_form, make dropdown option
311+
self.options.add(
312+
name="scaling_function",
313+
default="Linear",
314+
dtype=str,
315+
desc="Scaling/normalization function for input data",
316+
validValues=list(map_name_to_scaler.keys()),
317+
)
296318

297319
def run(self):
298320
"""
@@ -316,6 +338,9 @@ def run(self):
316338
self.msgQueue.put(f"input data columns: {input_data.columns}")
317339
self.msgQueue.put(f"output data columns: {output_data.columns}")
318340

341+
# extract scaling function option, apply it to the input data
342+
# get scaler object
343+
319344
# np.random.seed(46)
320345
# rn.seed(1342)
321346
# tf.random.set_seed(62)
@@ -341,22 +366,13 @@ def run(self):
341366
xdata = input_data
342367
zdata = output_data
343368

344-
xdata_bounds = {i: (xdata[i].min(), xdata[i].max()) for i in xdata} # x bounds
345-
zdata_bounds = {j: (zdata[j].min(), zdata[j].max()) for j in zdata} # z bounds
346-
347-
# normalize data using Linear form
348-
# users can normalize with any allowed form # manually, and then pass the
349-
# appropriate flag to FOQUS from the allowed list:
350-
# ["Linear", "Log", "Power", "Log 2", "Power 2"] - see the documentation for
351-
# details on the scaling formulations
352-
xmax, xmin = xdata.max(axis=0), xdata.min(axis=0)
353-
zmax, zmin = zdata.max(axis=0), zdata.min(axis=0)
354-
xdata, zdata = np.array(xdata), np.array(zdata)
355-
for i in range(len(xdata)):
356-
for j in range(len(xlabels)):
357-
xdata[i, j] = (xdata[i, j] - xmin[j]) / (xmax[j] - xmin[j])
358-
for j in range(len(zlabels)):
359-
zdata[i, j] = (zdata[i, j] - zmin[j]) / (zmax[j] - zmin[j])
369+
scaling_func_option = self.options["scaling_function"].value
370+
371+
scaler_instance = map_name_to_scaler[scaling_func_option]
372+
xdata, xdata_bounds = scale_dataframe(xdata, scaler_instance)
373+
zdata, zdata_bounds = scale_dataframe(zdata, scaler_instance)
374+
375+
print(f"using scaling function: {scaling_func_option}")
360376

361377
# method to create model
362378
def create_model():
@@ -370,7 +386,7 @@ def create_model():
370386
input_bounds=xdata_bounds,
371387
output_bounds=zdata_bounds,
372388
normalized=True,
373-
normalization_form="Linear",
389+
normalization_form=scaling_func_option,
374390
)
375391

376392
outputs = layers(inputs) # use network as function outputs = f(inputs)

foqus_lib/framework/surrogate/pytorch_nn.py

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,16 @@
5050
# from foqus_lib.framework.graph.graph import Graph
5151
from foqus_lib.framework.surrogate.surrogate import surrogate
5252
from foqus_lib.framework.uq.SurrogateParser import SurrogateParser
53+
from foqus_lib.framework.surrogate.scaling import (
54+
BaseScaler,
55+
LinearScaler,
56+
LogScaler,
57+
LogScaler2,
58+
PowerScaler,
59+
PowerScaler2,
60+
map_name_to_scaler,
61+
scale_dataframe
62+
)
5363

5464
# custom class to define Keras NN layers
5565
np.random.seed(46)
@@ -284,6 +294,13 @@ def __init__(self, dat=None):
284294
desc="Name of output file for model, should have file extension: .pt",
285295
hint="Enter a custom file name if desired",
286296
)
297+
self.options.add(
298+
name="scaling_function",
299+
default="Linear",
300+
dtype=str,
301+
desc="Scaling/normalization function for input data",
302+
validValues=["Linear", "Log", "Log2", "Power", "Power2"],
303+
)
287304

288305
def run(self):
289306
"""
@@ -326,22 +343,16 @@ def run(self):
326343
zlabels = list(output_data.columns)
327344
xdata = input_data
328345
zdata = output_data
329-
xdata_bounds = {i: (xdata[i].min(), xdata[i].max()) for i in xdata} # x bounds
330-
zdata_bounds = {j: (zdata[j].min(), zdata[j].max()) for j in zdata} # z bounds
331-
332-
# normalize data using Linear form, pass as custom string and parse with SymPy
333-
# users can normalize with any allowed form # manually, and then pass the
334-
# appropriate flag to FOQUS from the allowed list:
335-
# ["Linear", "Log", "Power", "Log 2", "Power 2", "Custom] - see the
336-
# documentation for details on the scaling formulations
337-
xmax, xmin = xdata.max(axis=0), xdata.min(axis=0)
338-
zmax, zmin = zdata.max(axis=0), zdata.min(axis=0)
339-
xdata, zdata = np.array(xdata), np.array(zdata)
340-
for i in range(len(xdata)):
341-
for j in range(len(xlabels)):
342-
xdata[i, j] = (xdata[i, j] - xmin[j]) / (xmax[j] - xmin[j])
343-
for j in range(len(zlabels)):
344-
zdata[i, j] = (zdata[i, j] - zmin[j]) / (zmax[j] - zmin[j])
346+
# xdata_bounds = {i: (xdata[i].min(), xdata[i].max()) for i in xdata} # x bounds
347+
# zdata_bounds = {j: (zdata[j].min(), zdata[j].max()) for j in zdata} # z bounds
348+
349+
scaling_func_option = self.options["scaling_function"].value
350+
351+
scaler_instance = map_name_to_scaler[scaling_func_option]
352+
xdata, xdata_bounds = scale_dataframe(xdata, scaler_instance)
353+
zdata, zdata_bounds = scale_dataframe(zdata, scaler_instance)
354+
355+
print(f"using scaling function: {scaling_func_option}")
345356

346357
model_data = np.concatenate(
347358
(xdata, zdata), axis=1
@@ -353,8 +364,11 @@ def run(self):
353364

354365
# raise exception here after BPC position
355366
# create model
356-
x_train = torch.from_numpy(xdata).float().to(device)
357-
z_train = torch.from_numpy(zdata).float().to(device)
367+
368+
# need to convert xdata to a numpy array for the below to work
369+
# otherwise causes TypeError: expected np.ndarray (got DataFrame)
370+
x_train = torch.from_numpy(xdata.to_numpy()).float().to(device)
371+
z_train = torch.from_numpy(zdata.to_numpy()).float().to(device)
358372

359373
# print type at this point
360374
# can also print inside create_model

foqus_lib/framework/surrogate/scaling.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from collections import OrderedDict
66

77
import numpy as np
8+
import pandas as pd
9+
from typing import Tuple
810

911

1012
def validate_for_scaling(array_in, lo, hi) -> None:
@@ -130,6 +132,10 @@ def unscale_power2(array_in, lo, hi):
130132
return result
131133

132134
class BaseScaler:
135+
# def __init__(self, data_array: np.ndarray):
136+
# self.data = data_array
137+
# self.lo_ = np.min(data_array)
138+
# self.hi_ = np.max(data_array)
133139

134140
def fit(self, X: np.ndarray):
135141
self.lo_ = np.min(X)
@@ -184,4 +190,24 @@ def transform(self, X: np.ndarray) -> np.ndarray:
184190

185191
def inverse_transform(self, X: np.ndarray) -> np.ndarray:
186192
return unscale_power2(X, self.lo_, self.hi_)
187-
193+
194+
map_name_to_scaler = {
195+
"Linear": LinearScaler(),
196+
"Log": LogScaler(),
197+
"Log2": LogScaler2(),
198+
"Power": PowerScaler(),
199+
"Power2": PowerScaler2(),
200+
#...
201+
}
202+
203+
def scale_dataframe(df: pd.DataFrame, scaler: BaseScaler) -> Tuple[pd.DataFrame, dict]:
204+
scaled_df = pd.DataFrame(np.nan, columns=df.columns, index=df.index)
205+
bounds = {}
206+
207+
for col_name in df:
208+
unscaled_col_data = df[col_name]
209+
scaled_col_data = scaler.fit_transform(unscaled_col_data)
210+
bounds[col_name] = scaler.lo_, scaler.hi_
211+
scaled_df.loc[:, col_name] = scaled_col_data
212+
213+
return scaled_df, bounds

foqus_lib/framework/surrogate/scikit_nn.py

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,16 @@
5252
from foqus_lib.framework.surrogate.surrogate import surrogate
5353
from foqus_lib.framework.uq.SurrogateParser import SurrogateParser
5454

55+
from foqus_lib.framework.surrogate.scaling import (
56+
BaseScaler,
57+
LinearScaler,
58+
LogScaler,
59+
LogScaler2,
60+
PowerScaler,
61+
PowerScaler2,
62+
map_name_to_scaler,
63+
scale_dataframe
64+
)
5565

5666
def validate_training_data(xdata: np.ndarray, zdata: np.ndarray):
5767
number_columns_in_xdata = xdata.shape[1]
@@ -250,6 +260,14 @@ def __init__(self, dat=None):
250260
hint="Enter a custom file name if desired",
251261
)
252262

263+
self.options.add(
264+
name="scaling_function",
265+
default="Linear",
266+
dtype=str,
267+
desc="Scaling/normalization function for input data",
268+
validValues=["Linear", "Log", "Log2", "Power", "Power2"],
269+
)
270+
253271
def run(self):
254272
"""
255273
This function overloads the Thread class function,
@@ -300,22 +318,13 @@ def run(self):
300318
xdata = input_data
301319
zdata = output_data
302320

303-
xdata_bounds = {i: (xdata[i].min(), xdata[i].max()) for i in xdata} # x bounds
304-
zdata_bounds = {j: (zdata[j].min(), zdata[j].max()) for j in zdata} # z bounds
305-
306-
# normalize data using Linear form, pass as custom string and parse with SymPy
307-
# users can normalize with any allowed form # manually, and then pass the
308-
# appropriate flag to FOQUS from the allowed list:
309-
# ["Linear", "Log", "Power", "Log 2", "Power 2", "Custom] - see the
310-
# documentation for details on the scaling formulations
311-
xmax, xmin = xdata.max(axis=0), xdata.min(axis=0)
312-
zmax, zmin = zdata.max(axis=0), zdata.min(axis=0)
313-
xdata, zdata = np.array(xdata), np.array(zdata)
314-
for i in range(len(xdata)):
315-
for j in range(len(xlabels)):
316-
xdata[i, j] = (xdata[i, j] - xmin[j]) / (xmax[j] - xmin[j])
317-
for j in range(len(zlabels)):
318-
zdata[i, j] = (zdata[i, j] - zmin[j]) / (zmax[j] - zmin[j])
321+
scaling_func_option = self.options["scaling_function"].value
322+
323+
scaler_instance = map_name_to_scaler[scaling_func_option]
324+
xdata, xdata_bounds = scale_dataframe(xdata, scaler_instance)
325+
zdata, zdata_bounds = scale_dataframe(zdata, scaler_instance)
326+
327+
print(f"using scaling function: {scaling_func_option}")
319328

320329
model_data = np.concatenate(
321330
(xdata, zdata), axis=1

0 commit comments

Comments
 (0)