Commit f687bdf2 authored by Avisek Naug's avatar Avisek Naug 🎨
Browse files

remove redundant steps

parent 87b48d46
Pipeline #311 failed with stages
in 0 seconds
%% Cell type:markdown id: tags:
# Read data on the Alumni Hall variables
* Read the data
* Remove outliers
* Remove extremely sparse data points
* NB: period denotes a 5 min interval. A period of 6 implies a timegap of 30 min. similarly a period of 12 implies a time gap of 1 hour
%% Cell type:markdown id: tags:
## 1. Preprocessing Data
%% Cell type:markdown id: tags:
### Import modules
%% Cell type:code id: tags:
``` python
from helperfunctions import *
from dataGenerator import *
from PredictionModel import *
period = 12 # ie period*5 minutes eg 12*5 60 minutes
```
%% Cell type:markdown id: tags:
### Read datafiles
%% Cell type:code id: tags:
``` python
datadirectory = 'AlumniHallPythonVariables/'
datecolumn_name = "Date / Time"
dflist1 = fileReader(datadirectory,datecolumn_name,format='%m/%d/%Y %H:%M')
datadirectory = 'ORH/'
datecolumn_name = "Date"
dflist2 = fileReader(datadirectory,datecolumn_name,format='%m/%d/%Y %H:%M')
datadirectory = 'SolarData/'
datecolumn_name ="PeriodEnd"
dflist3 = fileReader(datadirectory,datecolumn_name,format='%Y-%m-%dT%H:%M:%SZ',offset=-6)
#Subtracting offset 6 hours since looking at the data it can be inferred that it is in GMT/UTC
datadirectory = 'RoomTemp/'
datecolumn_name ="Date"
dflist4 = fileReader(datadirectory,datecolumn_name,format='%m/%d/%Y %H:%M')
```
%% Cell type:markdown id: tags:
### Merge Along rows
%% Cell type:code id: tags:
``` python
df1 = merge_df_rows(dflist1)
df2 = merge_df_rows(dflist2)
df3 = merge_df_rows(dflist3)
df4 = merge_df_rows(dflist4)
```
%% Cell type:markdown id: tags:
### Remove Outliers from desired column
%% Cell type:code id: tags:
``` python
df1 = removeOutliers(df1,'CHW_BTU_METER currentKbtuDeltaReading')
```
%% Cell type:markdown id: tags:
### Print Sparse Colums
%% Cell type:code id: tags:
``` python
sparseCols(df1,limit=0.9)#more than 0.9 fraction of data mising
sparseCols(df2,limit=0.9)#more than 0.9 fraction of data mising
sparseCols(df3,limit=0.9)#more than 0.9 fraction of data mising
sparseCols(df4,limit=0.9)#more than 0.9 fraction of data mising
```
%% Cell type:markdown id: tags:
### Remove Extremely Sparse Columns
%% Cell type:code id: tags:
``` python
df1 = removeSparseCols(df1, limit=0.1)
df2 = removeSparseCols(df2, limit=0.1)
df3 = removeSparseCols(df3, limit=0.1)
df4 = removeSparseCols(df4, limit=0.1)
```
%% Cell type:markdown id: tags:
### Remove Missing Rows now
%% Cell type:code id: tags:
``` python
df1 = droprows(df1)
df2 = droprows(df2)
df3 = droprows(df3)
df4 = droprows(df4)
```
%% Cell type:code id: tags:
``` python
df4.shape
```
%% Cell type:markdown id: tags:
### Create DF which contains average of zones
%% Cell type:code id: tags:
``` python
df4 = rowAverage(df4,"AvgTemp")
```
%% Cell type:markdown id: tags:
### Resample: incase we don't have some data at higher resolution
%% Cell type:code id: tags:
``` python
df1 = df_sample(df1,period=period)
df2 = df_sample(df2,period=period)
df3 = df_sample(df3,period=period)
df4 = df_sample(df4,period=period)
```
%% Cell type:markdown id: tags:
### Check column names
%% Cell type:code id: tags:
``` python
print (df1.columns)
print (df2.columns)
print (df3.columns)
print (df4.columns)
```
%% Cell type:markdown id: tags:
### Create TotalE column in df1
%% Cell type:code id: tags:
``` python
df1['TotalE'] = df1['CHW_BTU_METER currentKbtuDeltaReading']+ df1['HW_BTU_METER currentKbtuDeltaReading']
```
%% Cell type:markdown id: tags:
### Merge Along Columns
%% Cell type:code id: tags:
``` python
df = merge_df_columns([df1,df2,df3,df4])
df = droprows(df)
```
%% Cell type:markdown id: tags:
### Rename df1 columnn headers
%% Cell type:code id: tags:
``` python
df.columns
```
%% Cell type:code id: tags:
``` python
#rename column names in df1
df.columns = ['OAT','SAT','CoolE','HeatE','TotalE', 'OAH', 'Ghi',
'Period', 'PeriodStart', 'AvgTemp']
```
%% Cell type:code id: tags:
``` python
df.columns
```
%% Cell type:markdown id: tags:
### Keep required columns
%% Cell type:code id: tags:
``` python
df = keepcols(df,col_list=['OAT','OAH','SAT','TotalE','Ghi','AvgTemp'])
```
%% Cell type:code id: tags:
``` python
df.columns
```
%% Cell type:markdown id: tags:
### Rearrange Columns oat; orh; dhi; avg_temp; sat; totalE
%% Cell type:code id: tags:
``` python
desired_order = ['OAT', 'OAH', 'AvgTemp', 'Ghi', 'SAT', 'TotalE']
df = df.reindex(columns=desired_order)
```
%% Cell type:code id: tags:
``` python
df.head()
```
%% Cell type:markdown id: tags:
### Save dataframe
%% Cell type:code id: tags:
``` python
dfsave(df,'1Hourdata.pkl')
```
%% Cell type:markdown id: tags:
## 2. Creating the Training Network
### The cells below should be copied in a single script if they need to be run for a long duration on the accre cluster
%% Cell type:markdown id: tags:
### Train and Test Data generation
`
We are running training on contiguous sequences of the data
`
%% Cell type:code id: tags:
``` python
from helperfunctions import *
from dataGenerator import *
from PredictionModel import *
period = 12 # ie period*5 minutes eg 12*5 60 minutes
```
%% Cell type:markdown id: tags:
### Remove previous Results Plot
%% Cell type:code id: tags:
``` python
import os
try:
os.mkdir('ResultsPlot')
except FileExistsError:
pass
try:
os.mkdir('loginfo')
except FileExistsError:
pass
```
%% Cell type:markdown id: tags:
### Remove model training log file infos
%% Cell type:code id: tags:
``` python
files = os.listdir('loginfo')
for f in files:
os.remove(yourFilePath + f)
```
%% Cell type:code id: tags:
``` python
datapath = '1Hourdata.pkl'
df = read_pickle(datapath)
df.describe()
```
%% Cell type:markdown id: tags:
### Create train and test data
%% Cell type:code id: tags:
``` python
trains_X,trains_y,tests_X,tests_y = datageneration(datapath, period=period,lag=-1, inputfeatures = 5, outputfeatures=1)
```
%% Cell type:markdown id: tags:
### LSTM model
%% Cell type:code id: tags:
``` python
"""Since this SLTM model is stateful remember to make a batch size of 1 only"""
model = LSTMmodel(batch_size=1)
model.trainmodel(trains_X,trains_y,tests_X,tests_y,noepochs=1)
```
%% Cell type:code id: tags:
``` python
model.logError(trains_X,trains_y,tests_X,tests_y)
```
%% Cell type:code id: tags:
``` python
model.generatePlots(trains_y,tests_y)
```
%% Cell type:code id: tags:
``` python
```
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment