Skip to content

Commit 2abbe2f

Browse files
clean again
1 parent 5db08d4 commit 2abbe2f

10 files changed

Lines changed: 72 additions & 70 deletions

.idea/.gitignore

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/Optimal-Energy-System-Scheduling-Combining-Mixed-Integer-Programming-and-Deep-Reinforcement-Learning.iml

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/deployment.xml

Lines changed: 21 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/inspectionProfiles/profiles_settings.xml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/misc.xml

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/modules.xml

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/vcs.xml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

MIP_DQN.py

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@
77
import pyomo.environ as pyo
88
import pyomo.kernel as pmo
99
from omlt import OmltBlock
10-
import gurobipy as gp
11-
from gurobipy import GRB
10+
1211
from gurobipy import *
1312
from omlt.neuralnet import NetworkDefinition, FullSpaceNNFormulation,ReluBigMFormulation
1413
from omlt.io.onnx import write_onnx_model_with_bounds,load_onnx_neural_network_with_bounds
@@ -18,7 +17,6 @@
1817
from copy import deepcopy
1918
import wandb
2019
from random_generator_battery import ESSEnv
21-
from tools import test_one_episode
2220
## define net
2321
class ReplayBuffer:
2422
def __init__(self, max_len, state_dim, action_dim, gpu_id=0):
@@ -58,9 +56,6 @@ def extend_buffer(self, state, other): # CPU array to CPU array
5856
self.next_idx = next_idx
5957

6058
def sample_batch(self, batch_size) -> tuple:
61-
'''get reward, mask, action, state, next_state,
62-
actually, next_state is calculated based on state_indice,
63-
we need to randomly choose more blocks, instead of justing random choose state'''
6459
indices = rd.randint(self.now_len - 1, size=batch_size)
6560
r_m_a = self.buf_other[indices]
6661
return (r_m_a[:, 0:1],
@@ -72,23 +67,19 @@ def sample_batch(self, batch_size) -> tuple:
7267
def update_now_len(self):
7368
self.now_len = self.max_len if self.if_full else self.next_idx
7469
class Arguments:
75-
'''revise here for our own purpose'''
7670
def __init__(self, agent=None, env=None):
7771

7872
self.agent = agent # Deep Reinforcement Learning algorithm
7973
self.env = env # the environment for training
80-
self.plot_shadow_on=False# control do we need to plot all shadow figures
8174
self.cwd = None # current work directory. None means set automatically
8275
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
83-
# self.replace_train_data=True
8476
self.visible_gpu = '0,1,2,3' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
8577
self.worker_num = 2 # rollout workers number pre GPU (adjust it to get high GPU usage)
8678
self.num_threads = 8 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
8779

8880
'''Arguments for training'''
8981
self.num_episode=3000
9082
self.gamma = 0.995 # discount factor of future rewards
91-
# self.reward_scale = 1 # an approximate target reward usually be closed to 256
9283
self.learning_rate = 1e-4 # 2 ** -14 ~= 6e-5
9384
self.soft_update_tau = 1e-2 # 2 ** -8 ~= 5e-3
9485

@@ -97,7 +88,6 @@ def __init__(self, agent=None, env=None):
9788
self.repeat_times = 2 ** 3 # repeatedly update network to keep critic's loss small
9889
self.target_step = 1000 # collect target_step experiences , then update network, 1024
9990
self.max_memo = 50000 # capacity of replay buffer
100-
self.if_per_or_gae = False # PER for off-policy sparse reward: Prioritized Experience Replay.
10191
## arguments for controlling exploration
10292
self.explorate_decay=0.99
10393
self.explorate_min=0.3
@@ -152,8 +142,6 @@ def __init__(self,mid_dim,state_dim,action_dim):
152142
self.net_q2=nn.Sequential(nn.Linear(mid_dim,mid_dim),nn.ReLU(),
153143
nn.Linear(mid_dim,1))# we get q2 value
154144
def forward(self,value):
155-
'''we change state,action to value because when we use this part to build our MIP formulation and
156-
omlt could not directly build with two inputs'''
157145
mid=self.net_head(value)
158146
return self.net_q1(mid)
159147
def get_q1_q2(self,value):
@@ -172,7 +160,6 @@ def __init__(self):
172160
self.criterion = torch.nn.SmoothL1Loss()
173161

174162
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_per_or_gae=False, gpu_id=0):
175-
# explict call self.init() for multiprocessing
176163
self.device = torch.device(
177164
f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
178165
self.action_dim = action_dim
@@ -275,7 +262,6 @@ def get_obj_critic(self, buffer, batch_size) -> (torch.Tensor, torch.Tensor):
275262
q1, q2 = self.cri.get_q1_q2(torch.cat((state, action),dim=-1))
276263
obj_critic = self.criterion(q1, q_label) + self.criterion(q2, q_label) # twin critics
277264
return obj_critic, state
278-
# set replay buffer
279265

280266

281267

@@ -310,13 +296,10 @@ def get_episode_return(env, act, device):
310296
if done:
311297
break
312298
return episode_return,episode_unbalance,episode_operation_cost
313-
## define MIP
314299
class Actor_MIP:
315300
'''this actor is used to get the best action and Q function, the only input should be batch tensor state, action, and network, while the output should be
316301
batch tensor max_action, batch tensor max_Q'''
317302
def __init__(self,scaled_parameters,batch_size,net,state_dim,action_dim,env,constrain_on=False):
318-
# self.input_batch_state = input_batch_state
319-
# self.input_batch_action = input_batch_action
320303
self.batch_size = batch_size
321304
self.net = net
322305
self.state_dim = state_dim
@@ -474,8 +457,6 @@ def predict_best_action(self, state):
474457
if i_episode % 10 == 0:
475458
# target_step
476459
with torch.no_grad():
477-
# we choose to use decayed exploration to update other off policy algorithms here. We introduce
478-
# extra parameters, which we dont know whether this would improve the performance or not?
479460
agent._update_exploration_rate(args.explorate_decay,args.explorate_min)
480461
trajectory = agent.explore_env(env, target_step)
481462
steps, r_exp = update_buffer(trajectory)

random_generator_battery.py

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,14 @@ def add_pv_element(self,element):self.PV_Generation.append(element)
2323
def add_price_element(self,element):self.Prices.append(element)
2424
def add_electricity_element(self,element):self.Electricity_Consumption.append(element)
2525

26-
# get current time data based on given month day, and day_time
2726
def get_pv_data(self,month,day,day_time):return self.PV_Generation[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+day_time]
2827
def get_price_data(self,month,day,day_time):return self.Prices[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+day_time]
2928
def get_electricity_cons_data(self,month,day,day_time):return self.Electricity_Consumption[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+day_time]
30-
# get series data for one episode
3129
def get_series_pv_data(self,month,day): return self.PV_Generation[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24]
3230
def get_series_price_data(self,month,day):return self.Prices[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24]
3331
def get_series_electricity_cons_data(self,month,day):return self.Electricity_Consumption[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24]
3432

3533
class DG():
36-
'''simulate a simple diesel generator here'''
3734
def __init__(self,parameters):
3835
self.name=parameters.keys()
3936
self.a_factor=parameters['a']
@@ -45,7 +42,6 @@ def __init__(self,parameters):
4542
self.ramping_down=parameters['ramping_down']
4643
self.last_step_output=None
4744
def step(self,action_gen):
48-
##god damn fuck, I forget to set each generator could be zero.
4945
output_change=action_gen*self.ramping_up# constrain the output_change with ramping up boundary
5046
output=self.current_output+output_change
5147
if output>0:
@@ -54,7 +50,6 @@ def step(self,action_gen):
5450
output=0
5551
self.current_output=output
5652
def _get_cost(self,output):
57-
# here transfer mw parameters to kw parameters, avarage max cost per unit max [15,22]
5853
if output<=0:
5954
cost=0
6055
else:
@@ -76,8 +71,6 @@ def __init__(self,parameters):
7671
self.max_discharge=parameters['max_discharge']# max discharge ability
7772
self.efficiency=parameters['efficiency']# charge and discharge efficiency
7873
def step(self,action_battery):
79-
'''receive battery action, here is the action [-1,1] spaces and then update SOC with the constrains of charge/discharge, SOC boundaries'''
80-
# max(min_state_value,min(max_state_value,s+action))
8174
energy=action_battery*self.max_charge
8275
updated_capacity=max(self.min_soc,min(self.max_soc,(self.current_capacity*self.capacity+energy)/self.capacity))
8376
self.energy_change=(updated_capacity-self.current_capacity)*self.capacity# if charge, positive, if discharge, negative
@@ -105,7 +98,6 @@ def retrive_past_price(self):
10598
past_price=self.past_price# self.past price is fixed as the last days price
10699
else:
107100
past_price=self.price[24*(self.day-1):24*self.day]# get the price data of previous day
108-
# print(past_price)
109101
for item in past_price[(self.time-24)::]:# here if current time_step is 10, then the 10th data of past price is extrated to the result as the first value
110102
result.append(item)
111103
for item in self.price[24*self.day:(24*self.day+self.time)]:# continue to retrive data from the past and attend it to the result. as past price is change everytime.
@@ -149,11 +141,6 @@ def __init__(self,**kwargs):
149141
self.DG1_max=self.dg1.power_output_max
150142
self.DG2_max=self.dg2.power_output_max
151143
self.DG3_max=self.dg3.power_output_max
152-
@property
153-
def netload(self):
154-
'''get attributor of the class'''
155-
# return self.demand-self.grid.wp_gen-self.grid.pv_gen
156-
pass
157144

158145
def reset(self):
159146
'''reset is used for initialize the environment, decide the day of month.'''
@@ -182,9 +169,7 @@ def _build_state(self):
182169
net_load=(electricity_demand-pv_generation)/self.Netload_max
183170
obs=np.concatenate((np.float32(time_step),np.float32(price),np.float32(soc),np.float32(net_load),np.float32(dg1_output),np.float32(dg2_output),np.float32(dg3_output)),axis=None)
184171
return obs
185-
def _build_normalized_state(self):
186-
'''maybe dont need to do this in here but just do this in data manager'''
187-
pass
172+
188173
def step(self,action):# state transition here current_obs--take_action--get reward-- get_finish--next_obs
189174
## here we want to put take action into each components
190175
current_obs=self._build_state()
@@ -243,13 +228,6 @@ def step(self,action):# state transition here current_obs--take_action--get rewa
243228
if finish:
244229
self.final_step_outputs=final_step_outputs
245230
self.current_time=0
246-
# self.day+=1
247-
# if self.day>Constant.MONTHS_LEN[self.month-1]:
248-
# self.day=1
249-
# self.month+=1
250-
# if self.month>12:
251-
# self.month=1
252-
# self.day=1
253231
next_obs=self.reset()
254232

255233
else:
@@ -281,10 +259,7 @@ def _load_year_data(self):
281259
element=electricity[i:i+60]
282260
self.data_manager.add_electricity_element(sum(element)*300)
283261
## test environment
284-
if __name__ == '__main__':
285-
'''here we need a function that could validate
286-
whether the current month, day and time could coordinate to sent data
287-
8,December coordination of data is test from this way, that after 24 steps, we rechoose the month, day and reset current time= 0 '''
262+
if __name__ == '__main__':
288263
env=ESSEnv()
289264
env.TRAIN=False
290265
rewards=[]

random_generator_more_battery.py

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,9 @@
11

2-
import random
32
import numpy as np
4-
53
import pandas as pd
64
import gym
75
from gym import spaces
8-
import math
9-
import os
10-
import sys
6+
117
from Parameters import battery_parameters,dg_parameters
128

139
class Constant:
@@ -32,7 +28,6 @@ def get_series_pv_data(self,month,day): return self.PV_Generation[(sum(Constant.
3228
def get_series_price_data(self,month,day):return self.Prices[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24]
3329
def get_series_electricity_cons_data(self,month,day):return self.Electricity_Consumption[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24]
3430
class DG():
35-
'''simulate a simple diesel generator here'''
3631
def __init__(self,parameters):
3732
self.name=parameters.keys()
3833
self.a_factor=parameters['a']
@@ -44,16 +39,14 @@ def __init__(self,parameters):
4439
self.ramping_down=parameters['ramping_down']
4540
self.last_step_output=None
4641
def step(self,action_gen):
47-
##god damn fuck, I forget to set each generator could be zero.
48-
output_change=action_gen*self.ramping_up# constrain the output_change with ramping up boundary
42+
output_change=action_gen*self.ramping_up#
4943
output=self.current_output+output_change
5044
if output>0:
5145
output=max(self.power_output_min,min(self.power_output_max,output))# meet the constrain
5246
else:
5347
output=0
5448
self.current_output=output
5549
def _get_cost(self,output):
56-
# here transfer mw parameters to kw parameters, avarage max cost per unit max [15,22]
5750
if output<=0:
5851
cost=0
5952
else:
@@ -73,8 +66,7 @@ def __init__(self,parameters):
7366
self.max_discharge=parameters['max_discharge']# max discharge ability
7467
self.efficiency=parameters['efficiency']# charge and discharge efficiency
7568
def step(self,action_battery):
76-
'''receive battery action, here is the action [-1,1] spaces and then update SOC with the constrains of charge/discharge, SOC boundaries'''
77-
# max(min_state_value,min(max_state_value,s+action))
69+
7870
energy=action_battery*self.max_charge
7971
updated_capacity=max(self.min_soc,min(self.max_soc,(self.current_capacity*self.capacity+energy)/self.capacity))
8072
self.energy_change=(updated_capacity-self.current_capacity)*self.capacity# if charge, positive, if discharge, negative
@@ -109,8 +101,6 @@ def retrive_past_price(self):
109101
result.append(item)
110102
return result
111103
class ESSEnv(gym.Env):
112-
'''ENV descirption:
113-
the agent learn to charge with low price and then discharge at high price, in this way, it could get benefits'''
114104
def __init__(self,**kwargs):
115105
super(ESSEnv,self).__init__()
116106
#parameters
@@ -148,7 +138,6 @@ def __init__(self,**kwargs):
148138

149139

150140
def reset(self):
151-
'''reset is used for initialize the environment, decide the day of month.'''
152141
self.month=np.random.randint(1,13)# here we choose 12 month
153142

154143
if self.TRAIN:
@@ -207,7 +196,6 @@ def step(self,action):# state transition here current_obs--take_action--get rewa
207196
sell_benefit=self.grid._get_cost(price,unbalance)*self.sell_coefficient #sell money to grid is little [0.029,0.1]
208197
else:
209198
sell_benefit=self.grid._get_cost(price,self.grid.exchange_ability)*self.sell_coefficient
210-
#real unbalance that even grid could not meet
211199
self.excess=unbalance-self.grid.exchange_ability
212200
excess_penalty=self.excess*self.penalty_coefficient
213201
else:# unbalance <0, its load shedding model, in this case, deficient penalty is used
@@ -231,7 +219,6 @@ def step(self,action):# state transition here current_obs--take_action--get rewa
231219

232220
self.unbalance=unbalance
233221
self.real_unbalance=self.shedding+self.excess
234-
'''here we also need to store the final step outputs for the final steps including, soc, output of units for seeing the final states'''
235222
final_step_outputs=[self.dg1.current_output,self.dg2.current_output,self.dg3.current_output,self.battery1.current_capacity,self.battery2.current_capacity,self.battery3.current_capacity]
236223
self.current_time+=1
237224
finish=(self.current_time==self.episode_length)
@@ -244,7 +231,6 @@ def step(self,action):# state transition here current_obs--take_action--get rewa
244231
next_obs=self._build_state()
245232
return current_obs,next_obs,float(reward),finish
246233
def render(self, current_obs, next_obs, reward, finish):
247-
# print('day={}'.format(self.day))
248234
print('day={},hour={:2d}, state={}, next_state={}, reward={:.4f}, terminal={}\n'.format(self.day,self.current_time, current_obs, next_obs, reward, finish))
249235
def _load_year_data(self):
250236
'''this private function is used to load the electricity consumption, pv generation and related prices in a year as
@@ -258,7 +244,6 @@ def _load_year_data(self):
258244
price=price_df['Price'].apply(lambda x:x.replace(',','.')).to_numpy(dtype=float)
259245
electricity=electricity_df['Power'].apply(lambda x:x.replace(',','.')).to_numpy(dtype=float)
260246
# netload=electricity-pv_data
261-
'''we carefully redesign the magnitude for price and amount of generation as well as demand'''
262247
for element in pv_data:
263248
self.data_manager.add_pv_element(element*100)
264249
for element in price:
@@ -269,11 +254,7 @@ def _load_year_data(self):
269254
for i in range(0,electricity.shape[0],60):
270255
element=electricity[i:i+60]
271256
self.data_manager.add_electricity_element(sum(element)*300)
272-
## test environment
273-
if __name__ == '__main__':
274-
'''here we need a function that could validate
275-
whether the current month, day and time could coordinate to sent data
276-
8,December coordination of data is test from this way, that after 24 steps, we rechoose the month, day and reset current time= 0 '''
257+
if __name__ == '__main__':
277258
env=ESSEnv()
278259
env.TRAIN=False
279260
rewards=[]

0 commit comments

Comments
 (0)