Commit f687bdf2 authored by Avisek Naug's avatar Avisek Naug 🎨
Browse files

remove redundant steps

parent 87b48d46
Pipeline #311 failed with stages
in 0 seconds
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Read data on the Alumni Hall variables # Read data on the Alumni Hall variables
* Read the data * Read the data
* Remove outliers * Remove outliers
* Remove extremely sparse data points * Remove extremely sparse data points
* NB: period denotes a 5 min interval. A period of 6 implies a timegap of 30 min. similarly a period of 12 implies a time gap of 1 hour * NB: period denotes a 5 min interval. A period of 6 implies a timegap of 30 min. similarly a period of 12 implies a time gap of 1 hour
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## 1. Preprocessing Data ## 1. Preprocessing Data
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Import modules ### Import modules
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
from helperfunctions import * from helperfunctions import *
from dataGenerator import * from dataGenerator import *
from PredictionModel import * from PredictionModel import *
period = 12 # ie period*5 minutes eg 12*5 60 minutes period = 12 # ie period*5 minutes eg 12*5 60 minutes
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Read datafiles ### Read datafiles
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
datadirectory = 'AlumniHallPythonVariables/' datadirectory = 'AlumniHallPythonVariables/'
datecolumn_name = "Date / Time" datecolumn_name = "Date / Time"
dflist1 = fileReader(datadirectory,datecolumn_name,format='%m/%d/%Y %H:%M') dflist1 = fileReader(datadirectory,datecolumn_name,format='%m/%d/%Y %H:%M')
datadirectory = 'ORH/' datadirectory = 'ORH/'
datecolumn_name = "Date" datecolumn_name = "Date"
dflist2 = fileReader(datadirectory,datecolumn_name,format='%m/%d/%Y %H:%M') dflist2 = fileReader(datadirectory,datecolumn_name,format='%m/%d/%Y %H:%M')
datadirectory = 'SolarData/' datadirectory = 'SolarData/'
datecolumn_name ="PeriodEnd" datecolumn_name ="PeriodEnd"
dflist3 = fileReader(datadirectory,datecolumn_name,format='%Y-%m-%dT%H:%M:%SZ',offset=-6) dflist3 = fileReader(datadirectory,datecolumn_name,format='%Y-%m-%dT%H:%M:%SZ',offset=-6)
#Subtracting offset 6 hours since looking at the data it can be inferred that it is in GMT/UTC #Subtracting offset 6 hours since looking at the data it can be inferred that it is in GMT/UTC
datadirectory = 'RoomTemp/' datadirectory = 'RoomTemp/'
datecolumn_name ="Date" datecolumn_name ="Date"
dflist4 = fileReader(datadirectory,datecolumn_name,format='%m/%d/%Y %H:%M') dflist4 = fileReader(datadirectory,datecolumn_name,format='%m/%d/%Y %H:%M')
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Merge Along rows ### Merge Along rows
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
df1 = merge_df_rows(dflist1) df1 = merge_df_rows(dflist1)
df2 = merge_df_rows(dflist2) df2 = merge_df_rows(dflist2)
df3 = merge_df_rows(dflist3) df3 = merge_df_rows(dflist3)
df4 = merge_df_rows(dflist4) df4 = merge_df_rows(dflist4)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Remove Outliers from desired column ### Remove Outliers from desired column
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
df1 = removeOutliers(df1,'CHW_BTU_METER currentKbtuDeltaReading') df1 = removeOutliers(df1,'CHW_BTU_METER currentKbtuDeltaReading')
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Print Sparse Colums ### Print Sparse Colums
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
sparseCols(df1,limit=0.9)#more than 0.9 fraction of data mising sparseCols(df1,limit=0.9)#more than 0.9 fraction of data mising
sparseCols(df2,limit=0.9)#more than 0.9 fraction of data mising sparseCols(df2,limit=0.9)#more than 0.9 fraction of data mising
sparseCols(df3,limit=0.9)#more than 0.9 fraction of data mising sparseCols(df3,limit=0.9)#more than 0.9 fraction of data mising
sparseCols(df4,limit=0.9)#more than 0.9 fraction of data mising sparseCols(df4,limit=0.9)#more than 0.9 fraction of data mising
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Remove Extremely Sparse Columns ### Remove Extremely Sparse Columns
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
df1 = removeSparseCols(df1, limit=0.1) df1 = removeSparseCols(df1, limit=0.1)
df2 = removeSparseCols(df2, limit=0.1) df2 = removeSparseCols(df2, limit=0.1)
df3 = removeSparseCols(df3, limit=0.1) df3 = removeSparseCols(df3, limit=0.1)
df4 = removeSparseCols(df4, limit=0.1) df4 = removeSparseCols(df4, limit=0.1)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Remove Missing Rows now ### Remove Missing Rows now
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
df1 = droprows(df1) df1 = droprows(df1)
df2 = droprows(df2) df2 = droprows(df2)
df3 = droprows(df3) df3 = droprows(df3)
df4 = droprows(df4) df4 = droprows(df4)
``` ```
%% Cell type:code id: tags:
``` python
df4.shape
```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Create DF which contains average of zones ### Create DF which contains average of zones
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
df4 = rowAverage(df4,"AvgTemp") df4 = rowAverage(df4,"AvgTemp")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Resample: incase we don't have some data at higher resolution ### Resample: incase we don't have some data at higher resolution
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
df1 = df_sample(df1,period=period) df1 = df_sample(df1,period=period)
df2 = df_sample(df2,period=period) df2 = df_sample(df2,period=period)
df3 = df_sample(df3,period=period) df3 = df_sample(df3,period=period)
df4 = df_sample(df4,period=period) df4 = df_sample(df4,period=period)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Check column names ### Check column names
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
print (df1.columns) print (df1.columns)
print (df2.columns) print (df2.columns)
print (df3.columns) print (df3.columns)
print (df4.columns) print (df4.columns)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Create TotalE column in df1 ### Create TotalE column in df1
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
df1['TotalE'] = df1['CHW_BTU_METER currentKbtuDeltaReading']+ df1['HW_BTU_METER currentKbtuDeltaReading'] df1['TotalE'] = df1['CHW_BTU_METER currentKbtuDeltaReading']+ df1['HW_BTU_METER currentKbtuDeltaReading']
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Merge Along Columns ### Merge Along Columns
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
df = merge_df_columns([df1,df2,df3,df4]) df = merge_df_columns([df1,df2,df3,df4])
df = droprows(df) df = droprows(df)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Rename df1 columnn headers ### Rename df1 columnn headers
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
df.columns
```
%% Cell type:code id: tags:
``` python
#rename column names in df1 #rename column names in df1
df.columns = ['OAT','SAT','CoolE','HeatE','TotalE', 'OAH', 'Ghi', df.columns = ['OAT','SAT','CoolE','HeatE','TotalE', 'OAH', 'Ghi',
'Period', 'PeriodStart', 'AvgTemp'] 'Period', 'PeriodStart', 'AvgTemp']
``` ```
%% Cell type:code id: tags:
``` python
df.columns
```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Keep required columns ### Keep required columns
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
df = keepcols(df,col_list=['OAT','OAH','SAT','TotalE','Ghi','AvgTemp']) df = keepcols(df,col_list=['OAT','OAH','SAT','TotalE','Ghi','AvgTemp'])
``` ```
%% Cell type:code id: tags:
``` python
df.columns
```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Rearrange Columns oat; orh; dhi; avg_temp; sat; totalE ### Rearrange Columns oat; orh; dhi; avg_temp; sat; totalE
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
desired_order = ['OAT', 'OAH', 'AvgTemp', 'Ghi', 'SAT', 'TotalE'] desired_order = ['OAT', 'OAH', 'AvgTemp', 'Ghi', 'SAT', 'TotalE']
df = df.reindex(columns=desired_order) df = df.reindex(columns=desired_order)
``` ```
%% Cell type:code id: tags:
``` python
df.head()
```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Save dataframe ### Save dataframe
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
dfsave(df,'1Hourdata.pkl') dfsave(df,'1Hourdata.pkl')
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## 2. Creating the Training Network ## 2. Creating the Training Network
### The cells below should be copied in a single script if they need to be run for a long duration on the accre cluster ### The cells below should be copied in a single script if they need to be run for a long duration on the accre cluster
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Train and Test Data generation ### Train and Test Data generation
` `
We are running training on contiguous sequences of the data We are running training on contiguous sequences of the data
` `
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
from helperfunctions import * from helperfunctions import *
from dataGenerator import * from dataGenerator import *
from PredictionModel import * from PredictionModel import *
period = 12 # ie period*5 minutes eg 12*5 60 minutes period = 12 # ie period*5 minutes eg 12*5 60 minutes
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Remove previous Results Plot ### Remove previous Results Plot
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import os import os
try: try:
os.mkdir('ResultsPlot') os.mkdir('ResultsPlot')
except FileExistsError: except FileExistsError:
pass pass
try: try:
os.mkdir('loginfo') os.mkdir('loginfo')
except FileExistsError: except FileExistsError:
pass pass
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Remove model training log file infos ### Remove model training log file infos
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
files = os.listdir('loginfo') files = os.listdir('loginfo')
for f in files: for f in files:
os.remove(yourFilePath + f) os.remove(yourFilePath + f)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
datapath = '1Hourdata.pkl' datapath = '1Hourdata.pkl'
df = read_pickle(datapath) df = read_pickle(datapath)
df.describe() df.describe()
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Create train and test data ### Create train and test data
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
trains_X,trains_y,tests_X,tests_y = datageneration(datapath, period=period,lag=-1, inputfeatures = 5, outputfeatures=1) trains_X,trains_y,tests_X,tests_y = datageneration(datapath, period=period,lag=-1, inputfeatures = 5, outputfeatures=1)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### LSTM model ### LSTM model
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
"""Since this SLTM model is stateful remember to make a batch size of 1 only""" """Since this SLTM model is stateful remember to make a batch size of 1 only"""
model = LSTMmodel(batch_size=1) model = LSTMmodel(batch_size=1)
model.trainmodel(trains_X,trains_y,tests_X,tests_y,noepochs=1) model.trainmodel(trains_X,trains_y,tests_X,tests_y,noepochs=1)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
model.logError(trains_X,trains_y,tests_X,tests_y) model.logError(trains_X,trains_y,tests_X,tests_y)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
model.generatePlots(trains_y,tests_y) model.generatePlots(trains_y,tests_y)
``` ```
%% Cell type:code id: tags:
``` python
```
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment