EnergyQuantResearch
diff --git a/‎.idea/.gitignore‎
Lines changed: 8 additions & 0 deletions b/‎.idea/.gitignore‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎.idea/Optimal-Energy-System-Scheduling-Combining-Mixed-Integer-Programming-and-Deep-Reinforcement-Learning.iml‎
Lines changed: 12 additions & 0 deletions b/‎.idea/Optimal-Energy-System-Scheduling-Combining-Mixed-Integer-Programming-and-Deep-Reinforcement-Learning.iml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎.idea/deployment.xml‎
Lines changed: 21 additions & 0 deletions b/‎.idea/deployment.xml‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎.idea/inspectionProfiles/profiles_settings.xml‎
Lines changed: 6 additions & 0 deletions b/‎.idea/inspectionProfiles/profiles_settings.xml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎.idea/misc.xml‎
Lines changed: 4 additions & 0 deletions b/‎.idea/misc.xml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.idea/modules.xml‎
Lines changed: 8 additions & 0 deletions b/‎.idea/modules.xml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎.idea/vcs.xml‎
Lines changed: 6 additions & 0 deletions b/‎.idea/vcs.xml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎MIP_DQN.py‎
Lines changed: 1 addition & 20 deletions b/‎MIP_DQN.py‎
Lines changed: 1 addition & 20 deletions
diff --git a/‎random_generator_battery.py‎
Lines changed: 2 additions & 27 deletions b/‎random_generator_battery.py‎
Lines changed: 2 additions & 27 deletions
diff --git a/‎random_generator_more_battery.py‎
Lines changed: 4 additions & 23 deletions b/‎random_generator_more_battery.py‎
Lines changed: 4 additions & 23 deletions
@@ -7,8 +7,7 @@
 import pyomo.environ as pyo
 import pyomo.kernel as pmo
 from omlt import OmltBlock
-import gurobipy as gp
-from gurobipy import GRB
+
 from gurobipy import *
 from omlt.neuralnet import NetworkDefinition, FullSpaceNNFormulation,ReluBigMFormulation
 from omlt.io.onnx import write_onnx_model_with_bounds,load_onnx_neural_network_with_bounds
@@ -18,7 +17,6 @@
 from copy import deepcopy
 import wandb
 from random_generator_battery import ESSEnv
-from tools import test_one_episode
 ## define net
 class ReplayBuffer:
     def __init__(self, max_len, state_dim, action_dim, gpu_id=0):
@@ -58,9 +56,6 @@ def extend_buffer(self, state, other):  # CPU array to CPU array
         self.next_idx = next_idx
 
     def sample_batch(self, batch_size) -> tuple:
-        '''get reward, mask, action, state, next_state,
-        actually, next_state is calculated based on state_indice,
-        we need to randomly choose more blocks, instead of justing random choose state'''
         indices = rd.randint(self.now_len - 1, size=batch_size)
         r_m_a = self.buf_other[indices]
         return (r_m_a[:, 0:1],
@@ -72,23 +67,19 @@ def sample_batch(self, batch_size) -> tuple:
     def update_now_len(self):
         self.now_len = self.max_len if self.if_full else self.next_idx
 class Arguments:
-    '''revise here for our own purpose'''
     def __init__(self, agent=None, env=None):
 
         self.agent = agent  # Deep Reinforcement Learning algorithm
         self.env = env  # the environment for training
-        self.plot_shadow_on=False# control do we need to plot all shadow figures
         self.cwd = None  # current work directory. None means set automatically
         self.if_remove = False  # remove the cwd folder? (True, False, None:ask me)
-        # self.replace_train_data=True
         self.visible_gpu = '0,1,2,3'  # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
         self.worker_num = 2  # rollout workers number pre GPU (adjust it to get high GPU usage)
         self.num_threads = 8  # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
 
         '''Arguments for training'''
         self.num_episode=3000
         self.gamma = 0.995  # discount factor of future rewards
-        # self.reward_scale = 1  # an approximate target reward usually be closed to 256
         self.learning_rate = 1e-4  # 2 ** -14 ~= 6e-5
         self.soft_update_tau = 1e-2  # 2 ** -8 ~= 5e-3
 
@@ -97,7 +88,6 @@ def __init__(self, agent=None, env=None):
         self.repeat_times = 2 ** 3  # repeatedly update network to keep critic's loss small
         self.target_step = 1000 # collect target_step experiences , then update network, 1024
         self.max_memo = 50000  # capacity of replay buffer
-        self.if_per_or_gae = False  # PER for off-policy sparse reward: Prioritized Experience Replay.
         ## arguments for controlling exploration
         self.explorate_decay=0.99
         self.explorate_min=0.3
@@ -152,8 +142,6 @@ def __init__(self,mid_dim,state_dim,action_dim):
         self.net_q2=nn.Sequential(nn.Linear(mid_dim,mid_dim),nn.ReLU(),
                                   nn.Linear(mid_dim,1))# we get q2 value
     def forward(self,value):
-        '''we change state,action to value because when we use this part to build our MIP formulation and
-        omlt could not directly build with two inputs'''
         mid=self.net_head(value)
         return self.net_q1(mid)
     def get_q1_q2(self,value):
@@ -172,7 +160,6 @@ def __init__(self):
         self.criterion = torch.nn.SmoothL1Loss()
 
     def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_per_or_gae=False, gpu_id=0):
-        # explict call self.init() for multiprocessing
         self.device = torch.device(
             f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
         self.action_dim = action_dim
@@ -275,7 +262,6 @@ def get_obj_critic(self, buffer, batch_size) -> (torch.Tensor, torch.Tensor):
         q1, q2 = self.cri.get_q1_q2(torch.cat((state, action),dim=-1))
         obj_critic = self.criterion(q1, q_label) + self.criterion(q2, q_label)  # twin critics
         return obj_critic, state
-    # set replay buffer
 
 
 
@@ -310,13 +296,10 @@ def get_episode_return(env, act, device):
         if done:
             break
     return episode_return,episode_unbalance,episode_operation_cost
-## define MIP
 class Actor_MIP:
     '''this actor is used to get the best action and Q function, the only input should be batch tensor state, action, and network, while the output should be
     batch tensor max_action, batch tensor max_Q'''
     def __init__(self,scaled_parameters,batch_size,net,state_dim,action_dim,env,constrain_on=False):
-        # self.input_batch_state = input_batch_state
-        # self.input_batch_action = input_batch_action
         self.batch_size = batch_size
         self.net = net
         self.state_dim = state_dim
@@ -474,8 +457,6 @@ def predict_best_action(self, state):
                 if i_episode % 10 == 0:
                     # target_step
                     with torch.no_grad():
-                        # we choose to use decayed exploration to update other off policy algorithms here. We introduce
-                        # extra parameters, which we dont know whether this would improve the performance or not？
                         agent._update_exploration_rate(args.explorate_decay,args.explorate_min)
                         trajectory = agent.explore_env(env, target_step)
                         steps, r_exp = update_buffer(trajectory)
 
@@ -23,17 +23,14 @@ def add_pv_element(self,element):self.PV_Generation.append(element)
     def add_price_element(self,element):self.Prices.append(element)
     def add_electricity_element(self,element):self.Electricity_Consumption.append(element)
 
-    # get current time data based on given month day, and day_time
     def get_pv_data(self,month,day,day_time):return self.PV_Generation[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+day_time]
     def get_price_data(self,month,day,day_time):return self.Prices[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+day_time]
     def get_electricity_cons_data(self,month,day,day_time):return self.Electricity_Consumption[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+day_time]
-    # get series data for one episode
     def get_series_pv_data(self,month,day): return self.PV_Generation[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24]
     def get_series_price_data(self,month,day):return self.Prices[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24]
     def get_series_electricity_cons_data(self,month,day):return self.Electricity_Consumption[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24]
 
 class DG():
-    '''simulate a simple diesel generator here'''
     def __init__(self,parameters):
         self.name=parameters.keys()
         self.a_factor=parameters['a']
@@ -45,7 +42,6 @@ def __init__(self,parameters):
         self.ramping_down=parameters['ramping_down']
         self.last_step_output=None 
     def step(self,action_gen):
-        ##god damn fuck, I forget to set each generator could be zero. 
         output_change=action_gen*self.ramping_up# constrain the output_change with ramping up boundary
         output=self.current_output+output_change
         if output>0:
@@ -54,7 +50,6 @@ def step(self,action_gen):
             output=0
         self.current_output=output
     def _get_cost(self,output):
-        # here transfer mw parameters to kw parameters, avarage max cost per unit max [15,22]
         if output<=0:
             cost=0
         else:
@@ -76,8 +71,6 @@ def __init__(self,parameters):
         self.max_discharge=parameters['max_discharge']# max discharge ability
         self.efficiency=parameters['efficiency']# charge and discharge efficiency
     def step(self,action_battery):
-        '''receive battery action, here is the action [-1,1] spaces and then update SOC with the constrains of charge/discharge, SOC boundaries'''
-        # max(min_state_value,min(max_state_value,s+action))
         energy=action_battery*self.max_charge
         updated_capacity=max(self.min_soc,min(self.max_soc,(self.current_capacity*self.capacity+energy)/self.capacity))
         self.energy_change=(updated_capacity-self.current_capacity)*self.capacity# if charge, positive, if discharge, negative
@@ -105,7 +98,6 @@ def retrive_past_price(self):
             past_price=self.past_price# self.past price is fixed as the last days price
         else:
             past_price=self.price[24*(self.day-1):24*self.day]# get the price data of previous day 
-            # print(past_price)
         for item in past_price[(self.time-24)::]:# here if current time_step is 10, then the 10th data of past price is extrated to the result as the  first value
             result.append(item)
         for item in self.price[24*self.day:(24*self.day+self.time)]:# continue to retrive data from the past and attend it to the result. as past price is change everytime. 
@@ -149,11 +141,6 @@ def __init__(self,**kwargs):
         self.DG1_max=self.dg1.power_output_max
         self.DG2_max=self.dg2.power_output_max
         self.DG3_max=self.dg3.power_output_max
-    @property
-    def netload(self):
-        '''get attributor of the class'''
-        # return self.demand-self.grid.wp_gen-self.grid.pv_gen
-        pass
 
     def reset(self):
         '''reset is used for initialize the environment, decide the day of month.'''
@@ -182,9 +169,7 @@ def _build_state(self):
         net_load=(electricity_demand-pv_generation)/self.Netload_max
         obs=np.concatenate((np.float32(time_step),np.float32(price),np.float32(soc),np.float32(net_load),np.float32(dg1_output),np.float32(dg2_output),np.float32(dg3_output)),axis=None)
         return obs
-    def _build_normalized_state(self):
-        '''maybe dont need to do this in here but just do this in data manager'''
-        pass 
+
     def step(self,action):# state transition here current_obs--take_action--get reward-- get_finish--next_obs
         ## here we want to put take action into each components
         current_obs=self._build_state()
@@ -243,13 +228,6 @@ def step(self,action):# state transition here current_obs--take_action--get rewa
         if finish:
             self.final_step_outputs=final_step_outputs
             self.current_time=0
-            # self.day+=1
-            # if self.day>Constant.MONTHS_LEN[self.month-1]:
-            #     self.day=1
-            #     self.month+=1
-            # if self.month>12:
-            #     self.month=1
-            #     self.day=1
             next_obs=self.reset()
 
         else:
@@ -281,10 +259,7 @@ def _load_year_data(self):
             element=electricity[i:i+60]
             self.data_manager.add_electricity_element(sum(element)*300)
     ## test environment
-if __name__ == '__main__': 
-    '''here we need a function that could validate 
-    whether the current month, day and time could coordinate to sent data
-    8,December coordination of data is test from this way, that after 24 steps, we rechoose the month, day and reset current time= 0 '''
+if __name__ == '__main__':
     env=ESSEnv()
     env.TRAIN=False
     rewards=[]
 
@@ -1,13 +1,9 @@
 
-import random
 import numpy as np
-
 import pandas as pd 
 import gym
 from gym import spaces 
-import math 
-import os 
-import sys
+
 from Parameters import battery_parameters,dg_parameters
 
 class Constant:
@@ -32,7 +28,6 @@ def get_series_pv_data(self,month,day): return self.PV_Generation[(sum(Constant.
     def get_series_price_data(self,month,day):return self.Prices[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24]
     def get_series_electricity_cons_data(self,month,day):return self.Electricity_Consumption[(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24:(sum(Constant.MONTHS_LEN[:month-1])+day-1)*24+24]
 class DG():
-    '''simulate a simple diesel generator here'''
     def __init__(self,parameters):
         self.name=parameters.keys()
         self.a_factor=parameters['a']
@@ -44,16 +39,14 @@ def __init__(self,parameters):
         self.ramping_down=parameters['ramping_down']
         self.last_step_output=None 
     def step(self,action_gen):
-        ##god damn fuck, I forget to set each generator could be zero. 
-        output_change=action_gen*self.ramping_up# constrain the output_change with ramping up boundary
+        output_change=action_gen*self.ramping_up#
         output=self.current_output+output_change
         if output>0:
             output=max(self.power_output_min,min(self.power_output_max,output))# meet the constrain 
         else:
             output=0
         self.current_output=output
     def _get_cost(self,output):
-        # here transfer mw parameters to kw parameters, avarage max cost per unit max [15,22]
         if output<=0:
             cost=0
         else:
@@ -73,8 +66,7 @@ def __init__(self,parameters):
         self.max_discharge=parameters['max_discharge']# max discharge ability
         self.efficiency=parameters['efficiency']# charge and discharge efficiency
     def step(self,action_battery):
-        '''receive battery action, here is the action [-1,1] spaces and then update SOC with the constrains of charge/discharge, SOC boundaries'''
-        # max(min_state_value,min(max_state_value,s+action))
+
         energy=action_battery*self.max_charge
         updated_capacity=max(self.min_soc,min(self.max_soc,(self.current_capacity*self.capacity+energy)/self.capacity))
         self.energy_change=(updated_capacity-self.current_capacity)*self.capacity# if charge, positive, if discharge, negative
@@ -109,8 +101,6 @@ def retrive_past_price(self):
             result.append(item)
         return result 
 class ESSEnv(gym.Env):
-    '''ENV descirption: 
-    the agent learn to charge with low price and then discharge at high price, in this way, it could get benefits'''
     def __init__(self,**kwargs):
         super(ESSEnv,self).__init__()
         #parameters 
@@ -148,7 +138,6 @@ def __init__(self,**kwargs):
 
 
     def reset(self):
-        '''reset is used for initialize the environment, decide the day of month.'''
         self.month=np.random.randint(1,13)# here we choose 12 month
 
         if self.TRAIN:
@@ -207,7 +196,6 @@ def step(self,action):# state transition here current_obs--take_action--get rewa
                 sell_benefit=self.grid._get_cost(price,unbalance)*self.sell_coefficient #sell money to grid is little [0.029,0.1]
             else:
                 sell_benefit=self.grid._get_cost(price,self.grid.exchange_ability)*self.sell_coefficient
-                #real unbalance that even grid could not meet 
                 self.excess=unbalance-self.grid.exchange_ability
                 excess_penalty=self.excess*self.penalty_coefficient
         else:# unbalance <0, its load shedding model, in this case, deficient penalty is used 
@@ -231,7 +219,6 @@ def step(self,action):# state transition here current_obs--take_action--get rewa
 
         self.unbalance=unbalance
         self.real_unbalance=self.shedding+self.excess
-        '''here we also need to store the final step outputs for the final steps including, soc, output of units for seeing the final states'''
         final_step_outputs=[self.dg1.current_output,self.dg2.current_output,self.dg3.current_output,self.battery1.current_capacity,self.battery2.current_capacity,self.battery3.current_capacity]
         self.current_time+=1
         finish=(self.current_time==self.episode_length)
@@ -244,7 +231,6 @@ def step(self,action):# state transition here current_obs--take_action--get rewa
             next_obs=self._build_state()
         return current_obs,next_obs,float(reward),finish
     def render(self, current_obs, next_obs, reward, finish):
-        # print('day={}'.format(self.day))
         print('day={},hour={:2d}, state={}, next_state={}, reward={:.4f}, terminal={}\n'.format(self.day,self.current_time, current_obs, next_obs, reward, finish))
     def _load_year_data(self):
         '''this private function is used to load the electricity consumption, pv generation and related prices in a year as 
@@ -258,7 +244,6 @@ def _load_year_data(self):
         price=price_df['Price'].apply(lambda x:x.replace(',','.')).to_numpy(dtype=float)
         electricity=electricity_df['Power'].apply(lambda x:x.replace(',','.')).to_numpy(dtype=float)
         # netload=electricity-pv_data
-        '''we carefully redesign the magnitude for price and amount of generation as well as demand'''
         for element in pv_data:
             self.data_manager.add_pv_element(element*100)
         for element in price:
@@ -269,11 +254,7 @@ def _load_year_data(self):
         for i in range(0,electricity.shape[0],60):
             element=electricity[i:i+60]
             self.data_manager.add_electricity_element(sum(element)*300)
-    ## test environment
-if __name__ == '__main__': 
-    '''here we need a function that could validate 
-    whether the current month, day and time could coordinate to sent data
-    8,December coordination of data is test from this way, that after 24 steps, we rechoose the month, day and reset current time= 0 '''
+if __name__ == '__main__':
     env=ESSEnv()
     env.TRAIN=False
     rewards=[]