Commit e3cbff1f authored by hazrmard's avatar hazrmard
Browse files

Merge branch 'AlumniControlV3'. Re-learning control.

parents 9da0be26 a127af5c
__pycache__
.ipynb_checkpoints
*.ini
*.pyc
.idea
.vscode
*.ini
*store.csv
is_valid.csv
SAT_Setpoint.csv
store
control
log.txt
test.txt
*.ipynb
*.out
relearn.pkl
*.hdf5
*.h5f
_demo_source
results*/
.data/
rl_results_local/
rl_perf_plot.py
modeltrainnb/
energyfreqanalysis/
scratch/
\ No newline at end of file
-1659.8888191133738
This diff is collapsed.
#importing the modules for reading the data and pre-processing it
from pandas import *
import numpy as np
from keras.models import load_model
#importing the modules for setting up the environment from which the
#algorithm learns the control policy to be implemented
# importing the modules for setting up the environment from which the
# algorithm learns the control policy to be implemented
import gym
from gym import spaces, logger
from gym import spaces
from gym.utils import seeding
#In this function we evaluate the energy consumption based on the controller
#recommended optimized setpoint.
def costFn(self,inputVector,clusterNo):
#selecting the cost function model for the cluster to which the data point belongs
alpha = self.alpha[clusterNo]
samples = self.SV[clusterNo]
gamma = self.gamma[clusterNo]
intercept = self.intercept[clusterNo]
n = self.n[clusterNo]
#calculating the cost
count = [alpha[i]*rbfKernel(samples[i],inputVector,gamma) for i in range(n)]
cost = sum(count)
return cost+intercept[0]
#Used as a part of the support vector rergession model to evaluate the kernel function
def rbfKernel(x,inputVector,gamma):
dist = np.linalg.norm(x-inputVector)
kernel = np.exp(-gamma*((dist)**2))
return kernel
#The actor and critic networks, used to build the control policies, use
#normalized data. To map the output to the range of values needed for the
#building, we use this function
def boundsMapper(self,tanhoutput):
tanhdelta = 1 - (-1)
actualaction = self.lowth + (tanhoutput+1)*self.deltath/tanhdelta
return actualaction[0]
#This class describes the formal environment which the reinforcement learning
#interacts with. It inherits some properties from the gym imported earlier
# This class describes the formal environment which the reinforcement learning
# interacts with. It inherits some properties from the gym imported earlier
class Env(gym.Env):
def __init__(self):
def __init__(self, lstm_model, datapath: str = 'RL_relearn_data.pkl'):
#Here we initialize the data0driven models for evaluating energy for different
#types of weather conditions. The weights and biases of the models are stoerd in a file.
# descriptors for each cluster modelling energy consumption
self.alpha = []
self.gamma = [] # not to be confused w/ self.discount or gamma argument
self.intercept = []
self.n = []
self.SV = []
# loading clusters
for cluster in ['Cluster1', 'Cluster2', 'Cluster3', 'Cluster5', 'Cluster6']:
df = ExcelFile('SVReqns.xlsx').parse(cluster)
alpha = df['coefficients'].values.tolist()
self.alpha.append([float('%.2f' % elem) for elem in alpha ])
OAT = df['OAT'].values.tolist()
OAT = [float('%.2f' % elem) for elem in OAT ]
ORH = df['ORH'].values.tolist()
ORH = [float('%.2f' % elem) for elem in ORH ]
DHI = df['DHI'].values.tolist()
DHI = [float('%.2f' % elem) for elem in DHI ]
DT = df['DischargeTemp'].values.tolist()
DT = [ float('%.2f' % elem) for elem in DT ]
self.SV.append([np.array([a,b,c,d]) for a,b,c,d in zip(OAT,ORH,DHI,DT)])
intercept = df['intercept'].values.tolist()
self.intercept.append([ float('%.2f' % elem) for elem in intercept])
self.gamma.append(1.0/len(OAT))
self.n.append(len(OAT))
#Reading the weather data for the simulation and doing some pre processing steps
self.dataSet = read_pickle("AHU_data.pkl")
self.dataSet = self.dataSet.dropna()
self.dataSet.columns = range(6)
self.dataSet=self.dataSet.reindex(columns=[0,1,2,3,5,4])
self.Stats = self.dataSet.describe().iloc[[1,2],:].values
self.unnormalized = self.dataSet.drop([0,1,4],axis=1)
self.weather = self.dataSet.drop([0,1],axis=1)
#Storing the cluster centers for different clusters for which we have individual energy models
self.clCenters = [np.array([53.44,90.14,27]),\
np.array([49.87,35.36,15]),\
np.array([37.17,56.05,47]),\
np.array([75.61,38.86,83]),\
np.array([67.70,64.98,29])]
#Standard requirements for interfacing with Keras-RL's code
scaledObsvSpaceLB = [-2,-2,-2,-2]
scaledObsvSpaceUB = [2,2,8,2]
self.observation_space = spaces.Box(low=np.array(scaledObsvSpaceLB),\
high=np.array(scaledObsvSpaceUB),\
dtype=np.float32)
self.action_space = spaces.Box(low=np.array([48.98]), high=np.array([91.66]), dtype=np.float32)
#previously action space were 50/80 bounded
'''Here we initialize the data driven model for evaluating energy
The weights and biases of the models are stored in a file'''
self.model = lstm_model
'''choosing the weights of the rewardfn'''
self.w1 = 0.6
self.w2 = 0.4
'''Reading the weather data+current dt data for the simulation and doing some pre processing steps'''
self.rawdata = read_pickle(datapath) # 'OAT', 'OAH', 'Ghi', 'SAT', 'TotalE'
self.dataSet = self.rawdata.iloc[:, :-1] # removing last column: TotalE
self.m, self.n = self.dataSet.shape
# getting 0:mean 1:std 2:min 3:max--> array of shape (metric(4), number of cols(4))
self.Stats = self.dataSet.describe().iloc[[1, 2, 3, 7], :].to_numpy()
'''Windowed Stats: Assuming a window of 3 hours'''
self.win_len = 36 # we look at 3.0 hr data
self.windowMean = self.dataSet.rolling(self.win_len, min_periods=1).mean()['OAT']
self.windowMax = self.dataSet.rolling(self.win_len, min_periods=1).max()['OAT']
self.windowMin = self.dataSet.rolling(self.win_len, min_periods=1).min()['OAT']
'''Standard requirements for interfacing with Keras-RL's code'''
spacelb = [self.Stats[2, i] for i in range(self.n)]
spaceub = [self.Stats[3, i] for i in range(self.n)]
self.observation_space = spaces.Box(low=np.array(spacelb),
high=np.array(spaceub),
dtype=np.float32)
# self.action_space = spaces.Box(low=np.array([self.Stats[2,3]]), high=np.array([self.Stats[3,3]]),
# # dtype=np.float32)
self.action_space = spaces.Box(low=np.array([55.0]),
high=np.array([75.0]),
dtype=np.float32)
self.seed()
self.viewer = None
self.state = None
self.steps_beyond_done = None
#Setting counter and duration for an episode of the learning process
#in terms of the number of steps
self.counter = 1500
self.threshold = 3000
#Resetting the environment to its initial value
self.S = self.weather.iloc[[self.counter],:].values#Added to the control action generated
self.S_UN = self.unnormalized.iloc[[self.counter],:].values#Used for cluster assignment
self.dischargeTemp = self.dataSet.iloc[:,5]
self.state = self.S.reshape(4,)#!!!!!!!!!!!!!!
self.state[0] = (self.state[0]-self.Stats[0,2])/self.Stats[1,2]
self.state[1] = (self.state[1]-self.Stats[0,3])/self.Stats[1,3]
self.state[2] = (self.state[2]-self.Stats[0,4])/self.Stats[1,4]
self.state[3] = (self.state[3]-self.Stats[0,5])/self.Stats[1,5]
self.energyPlot = []
#parameters used to convert normalized data to non normalized values
self.highth = 91.66
self.lowth = 48.98
self.deltath = self.highth - self.lowth
# counter: counts the current step number in an episode
# episode length: dictates number of steps in an episode
# testing: whether we env is in testing phase or not
# dataPtr: steps through the entire available data in a cycle- gets
# reset to 0 when entire trainData is used up
self.counter = 0
self.episodelength = int(10080 / 5)
self.testing = False
self.dataPtr = 0
# slicing data into train and test sequences
self.slicepoint = 0.85
self.traindatalimit = int(self.slicepoint * self.m)
self.testdatalimit = self.m
'''Resetting the environment to its initial value'''
self.S = self.dataSet.iloc[self.dataPtr, :].to_numpy()
self.state = self.S.flatten()
def testenv(self):
self.testing = True
self.dataPtr = self.traindatalimit
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def step(self,controlAct):
def step(self, controlact):
"""
A normalized energy cost modelled for the building based on its current
state and the control action (degrees F).
"""
# Find cluster associated with current env state to model energy
# from taking action
clusterAssignment = [np.linalg.norm(self.S_UN-i) for i in self.clCenters]
clusterNo = clusterAssignment.index(min(clusterAssignment))
controlAct = boundsMapper(self,controlAct)
self.S[0,3] = controlAct
inputVector = self.S
energyCost = costFn(self,inputVector,clusterNo)
reward = 1*(1 - energyCost) + 0.01*(50 - np.abs(self.dischargeTemp.iloc[self.counter]-controlAct))
self.state = self.S.flatten()
oldenergy = self.costfn(self.state)
# update the state
controlact = controlact[0]
self.state[3] = controlact
rlenergy = self.costfn(self.state)
# ambient temperature based control
w_mean = self.windowMean.iloc[self.dataPtr]
w_max = self.windowMax.iloc[self.dataPtr]
w_min = self.windowMin.iloc[self.dataPtr]
# prevent cases where data is anomalous
ideal_dt = 68
if w_max != w_min:
# implementing the safety heuristics based on Darren's safety recommendation
if self.state[0]>70: # ie over the past few hours, temperature is over 70F -- hot weather
ideal_dt = 10*(2*w_mean-self.state[0]- w_min)/(w_max-w_min) + 56
elif self.state[0]<58: # ie over past few hrs temperature is in the intermediate range -- cold weather
ideal_dt = 3 * (2 * w_mean - self.state[0] - w_min) / (w_max - w_min) + 65
else: # ie over the past few hours temperature is between 70 and 55 -- intermediate weather
ideal_dt = 8 * (2 * w_mean - self.state[0] - w_min) / (w_max - w_min) + 58
penalty = np.abs(ideal_dt-controlact)
reward = -self.w1*rlenergy -self.w2*penalty
step_info = {}
if self.testing:
# Update callback info with new values
step_info = {'rl_energy': rlenergy,
'old_energy': oldenergy,
'oat': self.state[0],
'reward':float(reward),
'dat': controlact}
self.counter += 1
self.energyPlot.append(energyCost)
self.S = self.weather.iloc[[self.counter],:].values
self.S_UN = self.unnormalized.iloc[[self.counter],:].values
self.dataPtr += 1
# adjust proper indexing of sequential train and test data
if not self.testing:
if self.dataPtr > self.traindatalimit - 1:
self.dataPtr = 0
else:
if self.dataPtr > self.testdatalimit - 1:
self.dataPtr = self.traindatalimit
# see if episode has ended
done = False
if self.counter>self.threshold:
if self.counter>self.episodelength-1:
done = True
thefile = open('test.txt', 'w')#Used to save the energy values
for item in self.energyPlot:
thefile.write("%s\n" % item)
self.state = self.S.reshape(4,)
self.state[0] = (self.state[0]-self.Stats[0,2])/self.Stats[1,2]
self.state[1] = (self.state[1]-self.Stats[0,3])/self.Stats[1,3]
self.state[2] = (self.state[2]-self.Stats[0,4])/self.Stats[1,4]
self.state[3] = (self.state[3]-self.Stats[0,5])/self.Stats[1,5]
return self.state,reward,done,{}
#Resetting the state of the environment after a prespecified amount of time has passed
#This interval corresponds to the data that is available for that time period.
def reset(self):
self.S = self.weather.iloc[[self.counter],:].values
self.S_UN = self.unnormalized.iloc[[self.counter],:].values
self.dischargeTemp = self.dataSet.iloc[:,5]
self.state = self.S.reshape(4,)#!!!!!!!!!!!!!!
self.state[0] = (self.state[0]-self.Stats[0,2])/self.Stats[1,2]
self.state[1] = (self.state[1]-self.Stats[0,3])/self.Stats[1,3]
self.state[2] = (self.state[2]-self.Stats[0,4])/self.Stats[1,4]
self.state[3] = (self.state[3]-self.Stats[0,5])/self.Stats[1,5]
self.counter = 1500
self.energyPlot = []
self.steps_beyond_done = None
# proceed to the next state
self.S = self.dataSet.iloc[self.dataPtr, :].to_numpy()
self.state = self.S.flatten()
self.state[3] = controlact
return self.state, float(reward), done, step_info
return self.state
\ No newline at end of file
# Resetting the state of the environment after a pre specified amount of time has passed
# This interval corresponds to the data that is available for that time period.
def reset(self):
self.S = self.dataSet.iloc[self.dataPtr, :].to_numpy()
self.state = self.S.flatten()
self.counter = 0
self.steps_beyond_done = None
return self.state
def costfn(self, inputvector):
# if using min max scaled LSTM
inputvector = np.divide(np.subtract(inputvector, self.Stats[2, :]),
np.subtract(self.Stats[3, :], self.Stats[2, :]))
inputvector = inputvector.reshape(1, 1, inputvector.shape[0]) # reshape to (batchsize, timesteps, features)
return self.model.predict(inputvector, batch_size=1)[0, 0]
......@@ -32,10 +32,10 @@ The environment is designed to adhere to the interface of [OpenAI Gym][3] librar
The environment models energy consumption in the building. The model is generated using [support vector regression (SVR)][4]. The energy is a function of 4 measurements:
* Ambient temperature: Outside air temperature
* Relative humidity: Amount of water in air
* Solar irradiance: Amount of sunlight falling in the area
* Setpoint: Control temperature set for the air-handling unit
* Ambient temperature: Outside air temperature (OAT)
* Relative humidity: Amount of water in air (RH)
* Solar irradiance: Amount of sunlight falling in the area (DHI)
* Setpoint: Control temperature set for the air-handling unit (SAT)
The model relies on a history of measurements of the system. The measurements are obtained from existing software monitoring Alumni Hall.
......@@ -47,13 +47,28 @@ The repository contains the following files:
* `agent.py`: Defines the `agent` and functions to train and test the agent.
* `HVAC_environment.py`: Defines the environment representing the HVAC system.
* `Building_Environment_RL_Demo.ipynb`: A jupyter notebook demoing the training and testing process.
* `ah_api.py`: Pulls the weekly building + solar data for relearning.
* `moderetrain.py`: used to retrain the LSTM model on the new week data.
* `helperfunctions.py`: set of helper methods for reading and processing raw historical and relearning data
* `Building_Environment_RL_Demo.ipynb`: A jupyter notebook demoing the training and testing process+new test processes added
* `controller.py`: The main script that will run the control functionality.
* `AHU_data.pkl`: A database containing historical information on ambient temperature, relative humidity, solar irradiance and total building heating and cooling energy.
* `SVReqns.xlsx`: The excel files used to store parameters of a reression model for predicting discharge temperature.
* `HVAC_data.xlsx`: Same information as AHU_data.pkl but in excel format
* `agent_weights_actor.h5f`, `agent_weights_critic.h5f`: The initial control policies to use.
* `solar_irradiance.py`: Pulls solar irradiation data using Solcast API.
* `*.slurm`: Files used for offline weekly training
* `weights.best.hdf5`: LSTM model for energy prediction. Scaled data in -> scaled data out
* `.lastreward`: Accumulated reward by the controller using the parameters in `agent_weights_*.h5f` files.
* `RL_relearn_data.pkl`:One year plus worth of historical data going back from 22nd October 2019. Format of the data shown below
| `Dates` | `OAT` (F) | `OAH` (%) | `Ghi` (W/m2)| `SAT` (F) | `TotalE` (kJ) |
|---------------------|-------------|-----------|-----------|---------------|---------------|
| 2019-05-01 00:10:00 | 73.28 | 71.87 | 53.753710 | 28.0 | 35.551946 |
* `OAT`: Outside air temperature
* `OAH`: Outside air humidity
* `Ghi`: Global solar irradiation at Nashville
* `SAT`: Temperature of air coming out of the Air Handling Unit
* `TotalE`: Total Chilled Water Energy and Hot Water energy circulating through the entire building
To periodically train on data from the Alumni Hall under the server implementation, replace the datapath string appropriately.
## Deploying
......@@ -79,14 +94,19 @@ These reqirements can be installed using [Anaconda python distribution][6]. A vi
```
python 3.6
- tensorflow # neural networks
- keras-rl # reinforcement learning
- keras # high-level manipulation of neural networks
- numpy # mathematical operations
- matplotlib # plotting
- gym # environment models for reinforcement learning
- pandas # manipulating records (tables/rows)
- xlrd # reading excel files
- tensorflow # neural networks
- keras-rl # reinforcement learning
- keras # high-level manipulation of neural networks
- numpy # mathematical operations
- matplotlib # plotting
- gym # environment models for reinforcement learning
- pandas # manipulating records (tables/rows)
- xlrd # reading excel files
- joblib # loading trained models from scikit-learn
- beautifulsoup4 # used to parse api data from Niagara Platform
- scikit-learn # used for basic machine learning appplications and data processing
- requests # used to request API data in python
- scipy # used to process raw signals
```
These packages have been listed in `requirements.txt`. They can be installed from `pip` the bundled package manager that comes with python:
......@@ -97,51 +117,95 @@ pip install -r requirements.txt
### Running script
#### Configuration
Some variables are hardcoded into the script. Change them in `controller.py`:
```
DEV_READ_DIR = './'
DEV_WRITE_DIR = './'
PROD_READ_DIR = '/app001/niagara/Niagara4.2/vykon/stations/VUWS/shared'
PROD_WRITE_DIR = '/app001/niagara/Niagara4.2/vykon/shared'
```
Then run:
```
python controller.py --help
```
Will output the help string:
```
usage: controller.py [-h] [--use_control USE_CONTROL] [--period PERIOD]
usage: controller.py [-h] [--use_control USE_CONTROL]
[--control_mean_window CONTROL_MEAN_WINDOW]
[--control_output_threshold CONTROL_OUTPUT_THRESHOLD]
[--control_input_threshold T T T T]
[--control_input_fallback F F F F] [--period PERIOD]
[--interval INTERVAL]
[--training_duration TRAINING_DURATION] [--demo]
[--logging {10,20,30,40,50}]
[read_from] [store_at] [save_to]
[--training_duration TRAINING_DURATION]
[--relearn_window RELEARN_WINDOW] [--demo] [--dev_server]
[--logging {10,20,30,40,50}] [--halt_on_error]
[read_from] [store_at] [save_to] [is_valid]
Run Alumni Hall controller in a loop.
positional arguments:
read_from File to read Niagra data from.
store_at File to store measurements to.
store_at Directory to store measurements to. Files are named:
YEAR-WEEK-store.csv
save_to File to write control setpoint to.
is_valid File to write the setpoint validity indicator to.
is_valid File to write the setpoint validity indicator
optional arguments:
-h, --help show this help message and exit
--use_control USE_CONTROL, -c USE_CONTROL
Use existing control policy from file (.h5f)
Prefix of *_[actor|critic].h5f parameters file to use.
--control_mean_window CONTROL_MEAN_WINDOW, -m CONTROL_MEAN_WINDOW
Window of past actions over which to average control.
--control_output_threshold CONTROL_OUTPUT_THRESHOLD
Maximum amount output can change between intervals.
--control_input_threshold T T T T
Maximum amount each input can change between intervals
to be considered normal.
--control_input_fallback F F F F
Default input values to infer control from if access
to valid source fails.
--period PERIOD, -p PERIOD
Time interval (s) for control loop application.
--interval INTERVAL, -i INTERVAL
Interval (periods) between re-learning new control
policy. Defaults to 1 week for default period.
policy.
--training_duration TRAINING_DURATION, -t TRAINING_DURATION
Number of steps to use when re-learning control.
--relearn_window RELEARN_WINDOW, -w RELEARN_WINDOW
Number of past weeks over which to re-learn control.
--demo, -d Run a demo with fake input and output streams.
--dev_server, -x Flag to indicate if it is a development server. This
changes read/write paths
--logging {10,20,30,40,50}, -l {10,20,30,40,50}
Logging level (50-critical, 40-error, 30-warning,
20-info, 10-debug).
--halt_on_error Stop controller loop when error occurs.
```
For example:
```
python controller.py -p 1 -i 3 --demo
python controller.py -p 1 -i 3 --demo --dev_server
```
Will start a demo where the control loop acts every second, and the policy is adapted every 3 seconds.
Will start a demo on the development server where the control loop acts every second, and the policy is adapted every 3 seconds.
To exit, press `Ctrl+C`.
#### Additional files created
The script creates/overwrites several files while running:
* `agent_weights_actor.h5f` and `agent_weights_actor.h5f`: The parameters used by the reinforcement learning agent to control the system.
* `log.txt`: The log file containing messages printed by the controller.
* `solcast_dhi.csv`: A csv file that caches weather forecasts in bulk so that frequent network calls are not made.
* `store_at`, `save_to`, `is_valid`: Plain text files containing the history of measurements, the control setpoint, and setpoint validity status. The name of these files is specified as an argument to the script.
* `.lastreward`: The corresponding rewards accumulated by the agent using parameters in `agent_weights_*.h5f` files.
* `relearn.pkl`: Intermiediate data format for agent's environment when it is adapting control.
[1]: https://www.wikiwand.com/en/Reinforcement_learning
[2]: https://github.com/keras-rl/keras-rl
......
"""
Defines functions that construct various components of a reinforcement learning
agent.
agent
"""
from typing import List, Any
from logging import Logger
import numpy as np
from keras import backend as K
from keras.models import Sequential, Model
from keras.callbacks import Callback
from keras.layers import Dense, Activation, Flatten, Input, Concatenate
from keras.layers import Dense, Activation, Flatten, Input, Concatenate, Multiply, Lambda, BatchNormalization
from keras.optimizers import Adam
from rl.agents import DDPGAgent
......@@ -21,7 +24,7 @@ def get_agent(env) -> DDPGAgent:
network and a critic network.
Args:
* `env`: An OpenAI `gym.Env` instance.
* `env`: An OpenAI `gym.Env` instance
Returns:
* a `DDPGAgent` instance.
......@@ -30,24 +33,36 @@ def get_agent(env) -> DDPGAgent:
action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
actor = Sequential()
actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
actor.add(Dense(16))
actor.add(Activation('relu'))
actor.add(Dense(16))
actor.add(Activation('relu'))
actor.add(Dense(16))
actor.add(Activation('relu'))
actor.add(Dense(nb_actions))
actor.add(Activation('tanh'))
range_action_input = 0.5*(env.action_space.high- env.action_space.low)
constantBias = 1
lowb = env.action_space.low
#actor = Flatten(input_shape=(1,) + env.observation_space.shape)(observation_input)
y = Flatten()(observation_input)
y = Dense(16)(y)
y = BatchNormalization()(y)
y = Activation('relu')(y)
yr = Dense(16)(y)
y = BatchNormalization()(y)
y = Activation('relu')(y)
y = Dense(16)(y)
y = BatchNormalization()(y)
y = Activation('relu')(y)
y = Dense(nb_actions)(y)
y = Activation('tanh')(y)
y = Lambda(lambda x: (x+K.constant(constantBias))*K.constant(range_action_input)+ K.constant(lowb))(y)
actor = Model(inputs=[observation_input], outputs=[y])
flattened_observation = Flatten()(observation_input)
x = Concatenate()([action_input, flattened_observation])
x = Dense(32)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
......@@ -55,10 +70,10 @@ def get_agent(env) -> DDPGAgent:
memory = SequentialMemory(limit=100000, window_length=1)