@@ -19,7 +19,8 @@ def validate_for_scaling(array_in, lo, hi) -> None:
1919 if lo == hi :
2020 raise ValueError ("Array must contain non-identical values" )
2121 if not check_under_or_overflow (array_in ):
22- raise ValueError ("Array contains under/overflow values for dtype" )
22+ raise ValueError ("Array contains under/overflow values for dtype" )
23+
2324
2425def check_under_or_overflow (arr ):
2526 if np .issubdtype (arr .dtype , np .integer ):
@@ -28,7 +29,7 @@ def check_under_or_overflow(arr):
2829 info = np .finfo (arr .dtype )
2930 else :
3031 raise ValueError ("Unsupported data type" )
31- max_value = info .max
32+ max_value = info .max
3233 min_value = info .min
3334 return np .all (arr < max_value ) & np .all (arr > min_value )
3435
@@ -45,19 +46,20 @@ def scale_linear(array_in, lo=None, hi=None):
4546 result = (array_in - lo ) / (hi - lo )
4647 return result
4748
49+
4850def scale_log (array_in , lo = None , hi = None ):
49- # need to account for log domain
51+ # need to account for log domain
5052 if np .any (array_in <= 0 ):
5153 raise ValueError ("All values must be > 0 to use scale_log" )
5254 if lo is None :
5355 lo = np .min (array_in )
5456 if hi is None :
5557 hi = np .max (array_in )
5658 validate_for_scaling (array_in , lo , hi )
57- result = ((np .log10 (array_in ) - np .log10 (lo ))
58- / (np .log10 (hi ) - np .log10 (lo )))
59+ result = (np .log10 (array_in ) - np .log10 (lo )) / (np .log10 (hi ) - np .log10 (lo ))
5960 return result
6061
62+
6163def scale_log2 (array_in , lo = None , hi = None ):
6264 if lo is None :
6365 lo = np .min (array_in )
@@ -66,71 +68,76 @@ def scale_log2(array_in, lo=None, hi=None):
6668 validate_for_scaling (array_in , lo , hi )
6769 result = np .log10 (9 * (array_in - lo ) / (hi - lo ) + 1 )
6870 return result
71+
72+
6973# fix expected values in test
7074
75+
7176def scale_power (array_in , lo = None , hi = None ):
7277 if lo is None :
7378 lo = np .min (array_in )
7479 if hi is None :
7580 hi = np .max (array_in )
7681 validate_for_scaling (array_in , lo , hi )
77- result = (np .power (10 , array_in ) - np .power (10 , lo )) / (np .power (10 , hi ) - np .power (10 , lo ))
82+ result = (np .power (10 , array_in ) - np .power (10 , lo )) / (
83+ np .power (10 , hi ) - np .power (10 , lo )
84+ )
7885 return result
7986
87+
8088def scale_power2 (array_in , lo = None , hi = None ):
8189 if lo is None :
8290 lo = np .min (array_in )
8391 if hi is None :
8492 hi = np .max (array_in )
8593 validate_for_scaling (array_in , lo , hi )
86- result = (1 / 9 *
87- (np .power (10 , (array_in - lo ) / (hi - lo )) - 1 )
88- )
94+ result = 1 / 9 * (np .power (10 , (array_in - lo ) / (hi - lo )) - 1 )
8995 return result
9096
97+
9198def unscale_linear (array_in , lo , hi ):
9299 result = array_in * (hi - lo ) / 1.0 + lo
93100 return result
94101
102+
95103def unscale_log (array_in , lo , hi ):
96104 result = lo * np .power (hi / lo , array_in )
97105
98106 # result = ((np.log10(array_in) - np.log10(lo))
99107 # / (np.log10(hi) - np.log10(lo)))
100- # out = math.pow(lo * (hi / lo), (array_in / 10.0))
101- # out = (
102- # 10
103- # * (math.log10(array_in) - math.log10(lo))
104- # / (math.log10(hi) - math.log10(lo))
105- # )
108+ # out = math.pow(lo * (hi / lo), (array_in / 10.0))
109+ # out = (
110+ # 10
111+ # * (math.log10(array_in) - math.log10(lo))
112+ # / (math.log10(hi) - math.log10(lo))
113+ # )
106114 return result
107115
116+
108117def unscale_log2 (array_in , lo = None , hi = None ):
109- result = (np .power (10 , array_in / 1.0 ) - 1 ) * (
110- hi - lo
111- ) / 9.0 + lo
112- # out = (math.pow(10, array_in / 10.0) - 1) * (
113- # hi - lo
114- # ) / 9.0 + lo
115-
118+ result = (np .power (10 , array_in / 1.0 ) - 1 ) * (hi - lo ) / 9.0 + lo
119+ # out = (math.pow(10, array_in / 10.0) - 1) * (
120+ # hi - lo
121+ # ) / 9.0 + lo
122+
116123 return result
117124
125+
118126def unscale_power (array_in , lo , hi ):
119- # check if lo and hi were provided
127+ # check if lo and hi were provided
120128 # result = np.log10((array_in / 10.0) * (np.power(10, hi) - np.power(10, lo))
121129 # + np.power(10, lo))
122130 result = np .log10 (
123- (array_in / 1.0 ) * (np .power (10 , hi ) - np .power (10 , lo ))
124- + np .power (10 , lo )
125- )
131+ (array_in / 1.0 ) * (np .power (10 , hi ) - np .power (10 , lo )) + np .power (10 , lo )
132+ )
126133 return result
127134
135+
128136def unscale_power2 (array_in , lo , hi ):
129- result = (
130- np .log10 (9.0 * array_in / 1.0 + 1 ) * (hi - lo ) + lo
131- )
137+ result = np .log10 (9.0 * array_in / 1.0 + 1 ) * (hi - lo ) + lo
132138 return result
133139
140+
134141class BaseScaler :
135142 # def __init__(self, data_array: np.ndarray):
136143 # self.data = data_array
@@ -143,11 +150,7 @@ def fit(self, X: np.ndarray):
143150 return self
144151
145152 def fit_transform (self , X : np .ndarray ) -> np .ndarray :
146- return (
147- self
148- .fit (X )
149- .transform (X )
150- )
153+ return self .fit (X ).transform (X )
151154
152155 def transform (self , X : np .ndarray ) -> np .ndarray :
153156 raise NotImplementedError
@@ -162,44 +165,50 @@ def transform(self, X: np.ndarray) -> np.ndarray:
162165
163166 def inverse_transform (self , X : np .ndarray ) -> np .ndarray :
164167 return unscale_linear (X , self .lo_ , self .hi_ )
165-
168+
169+
166170class LogScaler (BaseScaler ):
167171 def transform (self , X : np .ndarray ) -> np .ndarray :
168172 return scale_log (X , self .lo_ , self .hi_ )
169173
170174 def inverse_transform (self , X : np .ndarray ) -> np .ndarray :
171175 return unscale_log (X , self .lo_ , self .hi_ )
172176
177+
173178class LogScaler2 (BaseScaler ):
174179 def transform (self , X : np .ndarray ) -> np .ndarray :
175180 return scale_log2 (X , self .lo_ , self .hi_ )
176181
177182 def inverse_transform (self , X : np .ndarray ) -> np .ndarray :
178183 return unscale_log2 (X , self .lo_ , self .hi_ )
179184
185+
180186class PowerScaler (BaseScaler ):
181187 def transform (self , X : np .ndarray ) -> np .ndarray :
182188 return scale_power (X , self .lo_ , self .hi_ )
183189
184190 def inverse_transform (self , X : np .ndarray ) -> np .ndarray :
185191 return unscale_power (X , self .lo_ , self .hi_ )
186192
193+
187194class PowerScaler2 (BaseScaler ):
188195 def transform (self , X : np .ndarray ) -> np .ndarray :
189196 return scale_power2 (X , self .lo_ , self .hi_ )
190197
191198 def inverse_transform (self , X : np .ndarray ) -> np .ndarray :
192199 return unscale_power2 (X , self .lo_ , self .hi_ )
193200
201+
194202map_name_to_scaler = {
195203 "Linear" : LinearScaler (),
196204 "Log" : LogScaler (),
197205 "Log2" : LogScaler2 (),
198206 "Power" : PowerScaler (),
199207 "Power2" : PowerScaler2 (),
200- #...
208+ # ...
201209}
202210
211+
203212def scale_dataframe (df : pd .DataFrame , scaler : BaseScaler ) -> Tuple [pd .DataFrame , dict ]:
204213 scaled_df = pd .DataFrame (np .nan , columns = df .columns , index = df .index )
205214 bounds = {}
0 commit comments