In [65]:
import pandas as pd
In [66]:
df = pd.read_csv("dataset.csv", parse_dates=['Service End Date'], low_memory=False)
In [67]:
df.rename(columns={'Service End Date': 'Date'}, inplace=True)

Monthly Analysis¶


In [68]:
df['Month'] = df['Date'].dt.to_period('M')
monthly_avg = df.groupby('Month')['Consumption (KWH)'].mean().reset_index()
monthly_avg['Month'] = monthly_avg['Month'].dt.to_timestamp()
monthly_df = monthly_avg[['Month', 'Consumption (KWH)']]
monthly_df.columns = ['Month', 'Average_Consumption']
In [69]:
monthly_df.head()
Out[69]:
Month Average_Consumption
0 2010-01-01 45446.030782
1 2010-02-01 38824.279283
2 2010-03-01 36652.063380
3 2010-04-01 36823.726747
4 2010-05-01 36918.425453
In [70]:
monthly_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 159 entries, 0 to 158
Data columns (total 2 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Month                159 non-null    datetime64[ns]
 1   Average_Consumption  159 non-null    float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 2.6 KB
In [71]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_style("darkgrid")

plt.figure(figsize=(14, 5))
plt.plot(monthly_df['Month'], monthly_df['Average_Consumption'], linewidth=2, color='blue')
plt.title("Average Monthly Electricity Consumption (KWH)", fontsize=14)
plt.ylabel('Consumption (KWH)', fontsize=12)
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [72]:
monthly_df.sort_values(by='Average_Consumption', ascending=False).head(10)
Out[72]:
Month Average_Consumption
12 2011-01-01 150980.000000
6 2010-07-01 63708.211823
7 2010-08-01 61722.964228
19 2012-07-01 53037.934614
31 2013-07-01 48941.412091
5 2010-06-01 48028.773140
20 2012-08-01 48004.884395
8 2010-09-01 47695.361916
0 2010-01-01 45446.030782
56 2015-08-01 43139.023571
In [73]:
df['Year'] = df['Date'].dt.year
df.groupby('Year').size()
Out[73]:
Year
2010.0    29267
2011.0        1
2012.0    35207
2013.0    38534
2014.0    39838
2015.0    40499
2016.0    40792
2017.0    38482
2018.0        7
2019.0    38799
2020.0    54382
2021.0    41366
2022.0    41481
2023.0    41105
2024.0    41264
2025.0        3
dtype: int64
In [74]:
monthly_df = monthly_df[~monthly_df['Month'].dt.year.isin([2011, 2018, 2025])]
In [75]:
sns.set_style("darkgrid")

plt.figure(figsize=(14, 5))
plt.plot(monthly_df['Month'], monthly_df['Average_Consumption'], linewidth=2, color='blue')
plt.title("Average Monthly Electricity Consumption (KWH)", fontsize=14)
plt.ylabel('Consumption (KWH)', fontsize=12)
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()
No description has been provided for this image

Monthly Predictions¶


In [76]:
from prophet import Prophet
monthly_df.rename(columns={'Month': 'ds', 'Average_Consumption': 'y'}, inplace=True)
In [77]:
model = Prophet()
model.fit(monthly_df)
12:17:25 - cmdstanpy - INFO - Chain [1] start processing
12:17:25 - cmdstanpy - INFO - Chain [1] done processing
Out[77]:
<prophet.forecaster.Prophet at 0x18d35fffbe0>

EC 1 month in the future -¶

In [78]:
future_months = model.make_future_dataframe(periods=1, freq='M')
In [79]:
forecast = model.predict(future_months)
In [80]:
model.plot(forecast)
plt.title("Forecasted Electricity Consumption")
plt.xlabel("Date")
plt.ylabel("Consumption (KWH)")
plt.show()
No description has been provided for this image
In [81]:
future_predictions = forecast[forecast['ds'] > monthly_df['ds'].max()]
future_predictions = future_predictions[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
print(future_predictions)
            ds          yhat    yhat_lower    yhat_upper
156 2024-12-31  24853.644731  22396.671723  27159.548383

EC 6 months in the future -¶

In [82]:
future_months = model.make_future_dataframe(periods=6, freq='M')
In [83]:
forecast = model.predict(future_months) 
In [84]:
model.plot(forecast)
plt.title("Forecasted Electricity Consumption")
plt.xlabel("Date")
plt.ylabel("Consumption (KWH)")
plt.show()
No description has been provided for this image
In [85]:
future_predictions = forecast[forecast['ds'] > monthly_df['ds'].max()]
future_predictions = future_predictions[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
print(future_predictions)
            ds          yhat    yhat_lower    yhat_upper
156 2024-12-31  24853.644731  22358.960918  27292.681836
157 2025-01-31  22563.340186  20259.413055  24861.266598
158 2025-02-28  21609.572339  19167.911512  24139.242252
159 2025-03-31  21144.182921  18727.399242  23629.570751
160 2025-04-30  23317.980708  20749.479946  25851.228456
161 2025-05-31  25861.853085  23266.155612  28303.532404

EC 9 months in the future -¶

In [86]:
future_months = model.make_future_dataframe(periods=9, freq='M')
In [87]:
forecast = model.predict(future_months) 
In [88]:
model.plot(forecast)
plt.title("Forecasted Electricity Consumption")
plt.xlabel("Date")
plt.ylabel("Consumption (KWH)")
plt.show()
No description has been provided for this image
In [89]:
future_predictions = forecast[forecast['ds'] > monthly_df['ds'].max()]
future_predictions = future_predictions[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
print(future_predictions)
            ds          yhat    yhat_lower    yhat_upper
156 2024-12-31  24853.644731  22519.403477  27284.775834
157 2025-01-31  22563.340186  20033.318795  24976.815448
158 2025-02-28  21609.572339  19086.890279  23989.229733
159 2025-03-31  21144.182921  18698.058881  23582.192642
160 2025-04-30  23317.980708  20810.945199  25721.861811
161 2025-05-31  25861.853085  23290.034812  28333.393047
162 2025-06-30  35691.667091  33244.552557  38169.453350
163 2025-07-31  36500.110218  34248.294788  38947.663284
164 2025-08-31  32938.516848  30681.172682  35229.188894
In [ ]:
 

Yearly Analysis¶


In [90]:
df['Year'] = df['Date'].dt.to_period('Y')
yearly_total = df.groupby('Year')['Consumption (KWH)'].sum().reset_index()
yearly_total['Year'] = yearly_total['Year'].dt.to_timestamp()
yearly_df = yearly_total[['Year', 'Consumption (KWH)']]
yearly_df.columns = ['Year', 'Total_Consumption']
In [91]:
yearly_df.head()
Out[91]:
Year Total_Consumption
0 2010-01-01 1.305557e+09
1 2011-01-01 1.509800e+05
2 2012-01-01 1.279641e+09
3 2013-01-01 1.260168e+09
4 2014-01-01 1.222798e+09
In [92]:
yearly_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 2 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               16 non-null     datetime64[ns]
 1   Total_Consumption  16 non-null     float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 384.0 bytes
In [93]:
plt.figure(figsize=(14, 5))
plt.plot(yearly_df['Year'], yearly_df['Total_Consumption'], linewidth=2, color='blue')
plt.title("Yearly Electricity Consumption (KWH)", fontsize=14)
plt.ylabel('Consumption (KWH)', fontsize=12)
plt.xlabel('Year', fontsize=12)
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [94]:
yearly_df = yearly_df[~yearly_df['Year'].dt.year.isin([2011, 2018, 2025])]
In [95]:
sns.set_style("darkgrid")

plt.figure(figsize=(14, 5))
plt.plot(yearly_df['Year'], yearly_df['Total_Consumption'], linewidth=2, color='blue')
plt.title("Yearly Electricity Consumption (KWH)", fontsize=14)
plt.ylabel('Consumption (KWH)', fontsize=12)
plt.xlabel('Year', fontsize=12)
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()
No description has been provided for this image

Yearly Predictions¶


In [96]:
yearly_df.rename(columns={'Year': 'ds', 'Total_Consumption': 'y'}, inplace=True)
In [97]:
model = Prophet()
model.fit(yearly_df)
12:18:00 - cmdstanpy - INFO - Chain [1] start processing
12:18:00 - cmdstanpy - INFO - Chain [1] done processing
Out[97]:
<prophet.forecaster.Prophet at 0x18d3646ee80>

EC 1 year in the future -¶

In [98]:
future_years = model.make_future_dataframe(periods=1, freq='Y')
In [99]:
forecast = model.predict(future_years) 
In [100]:
model.plot(forecast)
plt.title("Forecasted Electricity Consumption")
plt.xlabel("Date")
plt.ylabel("Consumption (KWH)")
plt.show()
No description has been provided for this image
In [101]:
future_predictions = forecast[forecast['ds'] > yearly_df['ds'].max()]
In [102]:
future_predictions = future_predictions[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
print(future_predictions)
           ds          yhat    yhat_lower    yhat_upper
13 2024-12-31  1.247297e+09  1.128906e+09  1.374092e+09

EC 10 years in the future -¶

In [103]:
future_years = model.make_future_dataframe(periods=10, freq='Y')
In [104]:
forecast = model.predict(future_years)
In [105]:
model.plot(forecast)
plt.title("Forecasted Electricity Consumption")
plt.xlabel("Date")
plt.ylabel("Consumption (KWH)")
plt.show()
No description has been provided for this image
In [106]:
future_predictions = forecast[forecast['ds'] > yearly_df['ds'].max()]
In [107]:
future_predictions = future_predictions[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
print(future_predictions)
           ds          yhat    yhat_lower    yhat_upper
13 2024-12-31  1.247297e+09  1.115981e+09  1.369104e+09
14 2025-12-31  1.313726e+09  1.188932e+09  1.431386e+09
15 2026-12-31  1.394505e+09  1.270925e+09  1.530247e+09
16 2027-12-31  1.489491e+09  1.361138e+09  1.614006e+09
17 2028-12-31  1.198532e+09  1.074770e+09  1.323877e+09
18 2029-12-31  1.264961e+09  1.139831e+09  1.392290e+09
19 2030-12-31  1.345741e+09  1.221208e+09  1.475364e+09
20 2031-12-31  1.440726e+09  1.318782e+09  1.565513e+09
21 2032-12-31  1.149767e+09  1.015143e+09  1.273040e+09
22 2033-12-31  1.216197e+09  1.095937e+09  1.337940e+09

EC 20 years in the future -¶

In [108]:
future_years = model.make_future_dataframe(periods=20, freq='Y')
In [109]:
forecast = model.predict(future_years)
In [110]:
model.plot(forecast)
plt.title("Forecasted Electricity Consumption")
plt.xlabel("Date")
plt.ylabel("Consumption (KWH)")
plt.show()
No description has been provided for this image
In [111]:
future_predictions = forecast[forecast['ds'] > yearly_df['ds'].max()]
In [112]:
future_predictions = future_predictions[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
print(future_predictions)
           ds          yhat    yhat_lower    yhat_upper
13 2024-12-31  1.247297e+09  1.126593e+09  1.376678e+09
14 2025-12-31  1.313726e+09  1.191309e+09  1.437901e+09
15 2026-12-31  1.394505e+09  1.270946e+09  1.517047e+09
16 2027-12-31  1.489491e+09  1.371474e+09  1.616276e+09
17 2028-12-31  1.198532e+09  1.074151e+09  1.325576e+09
18 2029-12-31  1.264961e+09  1.136338e+09  1.380003e+09
19 2030-12-31  1.345741e+09  1.220270e+09  1.480249e+09
20 2031-12-31  1.440726e+09  1.316787e+09  1.566012e+09
21 2032-12-31  1.149767e+09  1.021266e+09  1.274194e+09
22 2033-12-31  1.216197e+09  1.096445e+09  1.333709e+09
23 2034-12-31  1.296976e+09  1.175606e+09  1.414110e+09
24 2035-12-31  1.391961e+09  1.270857e+09  1.520130e+09
25 2036-12-31  1.101002e+09  9.747827e+08  1.227193e+09
26 2037-12-31  1.167432e+09  1.031024e+09  1.292726e+09
27 2038-12-31  1.248211e+09  1.123664e+09  1.378456e+09
28 2039-12-31  1.343196e+09  1.220753e+09  1.464241e+09
29 2040-12-31  1.052238e+09  9.222902e+08  1.173694e+09
30 2041-12-31  1.118667e+09  9.902822e+08  1.239756e+09
31 2042-12-31  1.199446e+09  1.065370e+09  1.324158e+09
32 2043-12-31  1.294432e+09  1.173018e+09  1.416951e+09
In [ ]:
 

Tuning the Prophet Model¶


In [113]:
models = {}

Default -¶

In [50]:
m_default = Prophet()
m_default.fit(monthly_df)
future = m_default.make_future_dataframe(periods=9, freq='M')
forecast_default = m_default.predict(future)
models['Default'] = forecast_default
12:10:05 - cmdstanpy - INFO - Chain [1] start processing
12:10:05 - cmdstanpy - INFO - Chain [1] done processing

Tuning Growth -¶

In [51]:
monthly_df['cap'] = monthly_df['y'].max() * 1.2
future_growth = future.copy()
future_growth['cap'] = monthly_df['cap'].iloc[0]
m_growth = Prophet(growth='logistic')
m_growth.fit(monthly_df)
forecast_growth = m_growth.predict(future_growth)
models['Growth (logistic)'] = forecast_growth
12:10:06 - cmdstanpy - INFO - Chain [1] start processing
12:10:06 - cmdstanpy - INFO - Chain [1] done processing

Tuning Seasonality -¶

In [52]:
m_seasonal = Prophet()
m_seasonal.add_seasonality(name='monthly_custom', period=30.5, fourier_order=5)
m_seasonal.fit(monthly_df)
forecast_seasonal = m_seasonal.predict(future)
models['Seasonality'] = forecast_seasonal
12:10:07 - cmdstanpy - INFO - Chain [1] start processing
12:10:07 - cmdstanpy - INFO - Chain [1] done processing

Tuning Changepoints -¶

In [53]:
m_changepoints = Prophet(n_changepoints=30, changepoint_prior_scale=0.1)
m_changepoints.fit(monthly_df)
forecast_cp = m_changepoints.predict(future)
models['Changepoints'] = forecast_cp
12:10:08 - cmdstanpy - INFO - Chain [1] start processing
12:10:08 - cmdstanpy - INFO - Chain [1] done processing

Results -¶

In [54]:
last_date = monthly_df['ds'].max()

future_predictions = models['Default'][models['Default']['ds'] > last_date][['ds', 'yhat']].copy()
future_predictions.rename(columns={'yhat': 'Default'}, inplace=True)

for model_name, forecast in models.items():
    if model_name != 'Default':
        temp = forecast[forecast['ds'] > last_date][['ds', 'yhat']].copy()
        temp.rename(columns={'yhat': model_name}, inplace=True)
        future_predictions = future_predictions.merge(temp, on='ds', how='left')

future_predictions = future_predictions.round(2)

print(future_predictions)
          ds   Default  Growth (logistic)  Seasonality  Changepoints
0 2024-12-31  24853.64           24728.90     24658.18      24864.13
1 2025-01-31  22563.34           22418.57     21359.47      22501.32
2 2025-02-28  21609.57           21315.89     33368.67      21434.45
3 2025-03-31  21144.18           20903.97     28485.90      20938.85
4 2025-04-30  23317.98           22906.50     35150.36      22859.16
5 2025-05-31  25861.85           25825.74     33510.52      25997.74
6 2025-06-30  35691.67           35771.13     48040.21      35967.51
7 2025-07-31  36500.11           36155.29     43759.04      36259.88
8 2025-08-31  32938.52           32451.21     36202.31      32575.29

Evaluating the models -¶

In [114]:
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, r2_score

# Default model
forecast_default = m_default.predict(monthly_df[['ds']])
y_true = monthly_df['y'].values
y_pred = forecast_default['yhat'].values
mae_default = mean_absolute_error(y_true, y_pred)
mape_default = mean_absolute_percentage_error(y_true, y_pred)
r2_default = r2_score(y_true, y_pred)

# Logistic model
monthly_df['cap'] = monthly_df['y'].max() * 1.2
forecast_logistic = m_growth.predict(monthly_df[['ds', 'cap']])
y_pred_logistic = forecast_logistic['yhat'].values
mae_logistic = mean_absolute_error(y_true, y_pred_logistic)
mape_logistic = mean_absolute_percentage_error(y_true, y_pred_logistic)
r2_logistic = r2_score(y_true, y_pred_logistic)

# Seasonal model
forecast_seasonal = m_seasonal.predict(monthly_df[['ds']])
y_pred_seasonal = forecast_seasonal['yhat'].values
mae_seasonal = mean_absolute_error(y_true, y_pred_seasonal)
mape_seasonal = mean_absolute_percentage_error(y_true, y_pred_seasonal)
r2_seasonal = r2_score(y_true, y_pred_seasonal)

# Changepoint model
forecast_cp = m_changepoints.predict(monthly_df[['ds']])
y_pred_cp = forecast_cp['yhat'].values
mae_cp = mean_absolute_error(y_true, y_pred_cp)
mape_cp = mean_absolute_percentage_error(y_true, y_pred_cp)
r2_cp = r2_score(y_true, y_pred_cp)

results_df = pd.DataFrame([
    {'Model': 'Default', 'MAE': round(mae_default, 2), 'MAPE': round(mape_default, 4), 'R2': round(r2_default, 4)},
    {'Model': 'Growth (logistic)', 'MAE': round(mae_logistic, 2), 'MAPE': round(mape_logistic, 4), 'R2': round(r2_logistic, 4)},
    {'Model': 'Seasonality', 'MAE': round(mae_seasonal, 2), 'MAPE': round(mape_seasonal, 4), 'R2': round(r2_seasonal, 4)},
    {'Model': 'Changepoints', 'MAE': round(mae_cp, 2), 'MAPE': round(mape_cp, 4), 'R2': round(r2_cp, 4)}
])

print(results_df)
               Model      MAE    MAPE      R2
0            Default  1357.00  0.0419  0.9369
1  Growth (logistic)  1457.61  0.0451  0.9197
2        Seasonality  1301.49  0.0399  0.9397
3       Changepoints  1352.28  0.0416  0.9378

Visualizing the results -¶

In [57]:
models = {
    'Default': forecast_default,
    'Growth (logistic)': forecast_growth,
    'Seasonality': forecast_seasonal,
    'Changepoints': forecast_cp
}
In [58]:
import matplotlib.pyplot as plt

plt.figure(figsize=(14, 6))

for model_name, forecast in models.items():
    future_pred = forecast[forecast['ds'] > monthly_df['ds'].max()]
    plt.plot(future_pred['ds'], future_pred['yhat'], label=model_name)

plt.title("Forecasted Monthly Electricity Consumption (All Models)")
plt.xlabel("Date")
plt.ylabel("Predicted Consumption (KWH)")
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
No description has been provided for this image

Monthly consumption, grouped by Borough¶


In [59]:
df['Date'] = pd.to_datetime(df['Date'])
monthly_borough = (
    df.groupby([df['Borough'], df['Date'].dt.to_period('M')])['Consumption (KWH)']
    .sum()
    .reset_index()
)
monthly_borough['Date'] = monthly_borough['Date'].dt.to_timestamp()
In [60]:
from prophet import Prophet

borough_forecasts = {}

exclude = ['NON DEVELOPMENT FACILITY', 'FHA']
boroughs = monthly_borough[~monthly_borough['Borough'].isin(exclude)]['Borough'].unique()

for b in boroughs:
    
    temp_df = monthly_borough[monthly_borough['Borough'] == b][['Date', 'Consumption (KWH)']].copy()
    temp_df.rename(columns={'Date': 'ds', 'Consumption (KWH)': 'y'}, inplace=True)
    
    m = Prophet()
    m.fit(temp_df)
    
    future = m.make_future_dataframe(periods=9, freq='M')
    forecast = m.predict(future)
    
    borough_forecasts[b] = forecast
    
    fig = m.plot(forecast)
    plt.title(f"Forecast for {b}")
    plt.xlabel("Date")
    plt.ylabel("Consumption (KWH)")
    plt.tight_layout()
    plt.show()
12:11:06 - cmdstanpy - INFO - Chain [1] start processing
12:11:06 - cmdstanpy - INFO - Chain [1] done processing
No description has been provided for this image
12:11:07 - cmdstanpy - INFO - Chain [1] start processing
12:11:07 - cmdstanpy - INFO - Chain [1] done processing
No description has been provided for this image
12:11:07 - cmdstanpy - INFO - Chain [1] start processing
12:11:07 - cmdstanpy - INFO - Chain [1] done processing
No description has been provided for this image
12:11:08 - cmdstanpy - INFO - Chain [1] start processing
12:11:08 - cmdstanpy - INFO - Chain [1] done processing
No description has been provided for this image
12:11:08 - cmdstanpy - INFO - Chain [1] start processing
12:11:08 - cmdstanpy - INFO - Chain [1] done processing
No description has been provided for this image
In [61]:
plt.figure(figsize=(14, 6))

last_date = monthly_borough['Date'].max()

for b, forecast in borough_forecasts.items():
    future_rows = forecast[forecast['ds'] > last_date]
    plt.plot(future_rows['ds'], future_rows['yhat'], label=b)

plt.title("9-Month Forecasted Electricity Consumption by Borough")
plt.xlabel("Date")
plt.ylabel("Predicted Consumption (KWH)")
plt.xticks(rotation=45)
plt.legend(title="Borough")
plt
Out[61]:
<module 'matplotlib.pyplot' from 'D:\\Prog_Softwares\\Anaconda\\lib\\site-packages\\matplotlib\\pyplot.py'>
No description has been provided for this image

Borough-wise Consumption Analysis¶

In [62]:
borough_total = df.groupby('Borough')['Consumption (KWH)'].sum().reset_index()

borough_total = borough_total[~borough_total['Borough'].isin(['NON DEVELOPMENT FACILITY', 'FHA'])]

plt.figure(figsize=(10, 6))
sns.barplot(data=borough_total, x='Borough', y='Consumption (KWH)', palette='viridis')
plt.title("Total Electricity Consumption by Borough")
plt.xticks(rotation=45)
plt.ylabel("Total Consumption (KWH)")
plt.tight_layout()
plt.show()
No description has been provided for this image

image.png

In [63]:
plt.figure(figsize=(8, 8))
plt.pie(borough_total['Consumption (KWH)'], labels=borough_total['Borough'], autopct='%1.1f%%', startangle=140)
plt.title("Electricity Consumption Share by Borough")
plt.axis('equal')  
plt.show()
No description has been provided for this image
In [64]:
import seaborn as sns
import matplotlib.pyplot as plt

monthly_filtered = monthly_borough[~monthly_borough['Borough'].isin(['NON DEVELOPMENT FACILITY', 'FHA'])]

plt.figure(figsize=(14, 6))
sns.lineplot(data=monthly_filtered, x='Date', y='Consumption (KWH)', hue='Borough')
plt.title("Monthly Electricity Consumption Trends by Borough")
plt.xlabel("Date")
plt.ylabel("Consumption (KWH)")
plt.xticks(rotation=45)
plt.legend(title="Borough")
plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]: