-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
59 changed files
with
18,585 additions
and
11,507 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
######################################################## | ||
# Sample customers blueprint of endpoints | ||
# Remove this file if you are not using it in your project | ||
######################################################## | ||
|
||
from flask import Blueprint, request, jsonify, make_response, current_app | ||
import json | ||
from backend.db_connection import db | ||
|
||
|
||
enterprises = Blueprint('enterprises', __name__) | ||
|
||
# get all of the enterprise tags from database | ||
@enterprises.route('/tags', methods=['GET']) | ||
def get_tags(): | ||
# get a cursor object from the database | ||
cursor = db.get_db().cursor() | ||
|
||
cursor.execute(''' | ||
SELECT description | ||
FROM EmissionTags | ||
WHERE EmissionTags.id IN ( | ||
SELECT tag_id | ||
FROM EntTags | ||
WHERE EntTags.enterprise_id = 1 | ||
); | ||
''') | ||
|
||
# grab the column headers from the returned data | ||
column_headers = [x[0] for x in cursor.description] | ||
|
||
# create an empty dictionary object to use in | ||
# putting column headers together with data | ||
json_data = [] | ||
|
||
# fetch all the data from the cursor | ||
theData = cursor.fetchall() | ||
|
||
# for each of the rows, zip the data elements together with | ||
# the column headers. | ||
for row in theData: | ||
json_data.append(dict(zip(column_headers, row))) | ||
|
||
return jsonify(json_data) | ||
|
||
|
||
# get all of the matching NGO's based on tags | ||
@enterprises.route('/NGOMatch', methods=['GET']) | ||
def get_matches(): | ||
# get a cursor object from the database | ||
cursor = db.get_db().cursor() | ||
|
||
cursor.execute(''' | ||
SELECT NGO.name, EmissionTags.description | ||
FROM NGO | ||
JOIN NGOTags ON NGO.id = NGOTags.ngo_id | ||
JOIN EmissionTags ON NGOTags.tag_id = EmissionTags.id | ||
WHERE EmissionTags.id IN ( | ||
SELECT tag_id | ||
FROM EntTags | ||
WHERE EntTags.enterprise_id = 1 | ||
); | ||
''') | ||
|
||
# grab the column headers from the returned data | ||
column_headers = [x[0] for x in cursor.description] | ||
|
||
# create an empty dictionary object to use in | ||
# putting column headers together with data | ||
json_data = [] | ||
|
||
# fetch all the data from the cursor | ||
theData = cursor.fetchall() | ||
|
||
# for each of the rows, zip the data elements together with | ||
# the column headers. | ||
for row in theData: | ||
json_data.append(dict(zip(column_headers, row))) | ||
|
||
return jsonify(json_data) | ||
|
||
|
||
# get my emissions, my country's, and avg other companies in same country emissions | ||
@enterprises.route('/EntCompare', methods=['GET']) | ||
def get_comparison(): | ||
# get a cursor object from the database | ||
cursor = db.get_db().cursor() | ||
|
||
cursor.execute(''' | ||
SELECT AVG(Enterprises.emission_result) AS 'Average Emission (by Country)', | ||
Country.name AS 'Country', | ||
(SELECT e2.emission_result | ||
FROM Enterprises e2 | ||
WHERE e2.id = 1) AS 'Your Emissions' | ||
FROM Enterprises | ||
JOIN Country ON Enterprises.country_id = Country.id | ||
WHERE Country.name = | ||
(SELECT Country.name | ||
FROM Enterprises | ||
JOIN Country ON Enterprises.country_id = Country.id | ||
WHERE Enterprises.id = 1 | ||
LIMIT 1) | ||
GROUP BY Country.name; | ||
''') | ||
|
||
# grab the column headers from the returned data | ||
column_headers = [x[0] for x in cursor.description] | ||
|
||
# create an empty dictionary object to use in | ||
# putting column headers together with data | ||
json_data = [] | ||
|
||
# fetch all the data from the cursor | ||
theData = cursor.fetchall() | ||
|
||
# for each of the rows, zip the data elements together with | ||
# the column headers. | ||
for row in theData: | ||
json_data.append(dict(zip(column_headers, row))) | ||
|
||
return jsonify(json_data) | ||
|
||
|
||
# Get all the supply chain history for this enterprise | ||
@enterprises.route('/EntSupplyChain', methods=['GET']) | ||
def get_supplychain(): | ||
cursor = db.get_db().cursor() | ||
|
||
cursor.execute('SELECT * FROM SupplyChain WHERE SupplyChain.enterprise_id = 1') | ||
|
||
column_headers = [x[0] for x in cursor.description] | ||
|
||
json_data = [] | ||
|
||
theData = cursor.fetchall() | ||
|
||
for row in theData: | ||
json_data.append(dict(zip(column_headers, row))) | ||
|
||
return jsonify(json_data) | ||
|
||
# Get all the operating cost history for this enterprise | ||
@enterprises.route('/EntCosts', methods=['GET']) | ||
def get_costs(): | ||
cursor = db.get_db().cursor() | ||
|
||
cursor.execute('SELECT * FROM operatingEmission WHERE operatingEmission.enterprise_id = 1') | ||
|
||
column_headers = [x[0] for x in cursor.description] | ||
|
||
json_data = [] | ||
|
||
theData = cursor.fetchall() | ||
|
||
for row in theData: | ||
json_data.append(dict(zip(column_headers, row))) | ||
|
||
return jsonify(json_data) | ||
|
||
# Get all the flights history for this enterprise | ||
@enterprises.route('/EntFlights', methods=['GET']) | ||
def get_flights(): | ||
cursor = db.get_db().cursor() | ||
|
||
cursor.execute('SELECT * FROM Flight WHERE Flight.enterprise_id = 1') | ||
|
||
column_headers = [x[0] for x in cursor.description] | ||
|
||
json_data = [] | ||
|
||
theData = cursor.fetchall() | ||
|
||
for row in theData: | ||
json_data.append(dict(zip(column_headers, row))) | ||
|
||
return jsonify(json_data) |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
""" | ||
The Train, Test, and Predict functions for the CO2 Emission Linear Regression | ||
ML Model | ||
""" | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import pandasdmx as sdmx | ||
from sklearn.metrics import r2_score | ||
from functools import reduce | ||
import pandasdmx as sdmx | ||
|
||
def train() -> np.array: | ||
""" | ||
Calculates the slopes for the CO2 emissions regression model. | ||
:returns: An array with the slopes in shape (3,) | ||
""" | ||
estat = sdmx.Request("ESTAT") | ||
resp = estat.data( | ||
"ENV_AIR_GGE", | ||
key={ | ||
"unit": "THS_T", | ||
"freq": "A", | ||
"src_crf": "TOTX4_MEMONIA", | ||
"airpol": "GHG" | ||
} | ||
) | ||
emission_df = (resp | ||
.to_pandas(datetime={'dim': 'TIME_PERIOD'}) | ||
.droplevel(level=['unit', 'freq', 'src_crf', 'airpol'], axis=1)) | ||
melted_emissions_df = melt_smdx_dataframe(emission_df) | ||
|
||
resp = estat.data( | ||
"NRG_D_HHQ", | ||
key={ | ||
"siec": "TOTAL", | ||
"unit": "TJ", | ||
"nrg_bal": "FC_OTH_HH_E", | ||
"freq": "A", | ||
} | ||
) | ||
household_energy_df = (resp | ||
.to_pandas(datetime={'dim': 'TIME_PERIOD', 'freq': 'freq'}) | ||
.droplevel(level=["siec", "unit", "nrg_bal"], axis=1)) | ||
melted_household_energy_df = melt_smdx_dataframe(household_energy_df) | ||
|
||
resp = estat.data( | ||
"TEN00127", | ||
key={ | ||
"unit": "KTOE", | ||
"freq": "A", | ||
"siec": "O4652XR5210B", | ||
"nrg_bal": "FC_TRA_ROAD_E" | ||
} | ||
) | ||
gas_df = (resp | ||
.to_pandas(datetime={'dim': 'TIME_PERIOD'}) | ||
.droplevel(level=['unit', 'freq', 'siec', "nrg_bal"], axis=1)) | ||
melted_gas_df = melt_smdx_dataframe(gas_df) | ||
|
||
merged_df = merge_dataframes([melted_emissions_df, | ||
melted_household_energy_df, | ||
melted_gas_df]) | ||
merged_df.columns = ["year", "geo", "emissions", "energy", "gas"] | ||
merged_df = merged_df.drop(merged_df[(merged_df.geo == "EU27_2020") | | ||
(merged_df.geo == "EU20")].index) | ||
merged_df = merged_df.drop("year", axis=1) | ||
standard_df = standardize(merged_df) | ||
|
||
df_dummies = pd.get_dummies(standard_df, dtype=int, columns=["geo"]) | ||
df_dummies = df_dummies.fillna(0) | ||
|
||
#X = np.pad(df_dummies.iloc[:, 1:].to_numpy(dtype=np.float64), | ||
# ((0,0), (1,0)), mode="constant", constant_values=1) | ||
X = np.pad(standard_df.iloc[:,1:3].to_numpy(dtype=np.float64), | ||
((0,0), (1,0)), mode="constant", constant_values=1) | ||
y = np.array(df_dummies["emissions"], dtype=np.float64) | ||
|
||
m = np.matmul(np.linalg.inv(np.matmul(X.T, X)), np.matmul(X.T, y)) | ||
|
||
return m | ||
|
||
|
||
def test(X: np.array, y: np.array) -> any: | ||
""" | ||
Tests the CO2 emissions regression model | ||
:param X: The padded X features | ||
:param y: The y features | ||
:returns: The R2 value of the model w/ LOO-CV | ||
""" | ||
np_remove = lambda a, i: np.concatenate([a[:i,], a[i + 1:,]]) | ||
lin_reg = lambda X, Y: np.matmul(np.linalg.inv(np.matmul(X.T, X)), | ||
np.matmul(X.T, Y)) | ||
|
||
y_pred = [] | ||
for i in range(len(X)): | ||
holdout_X = X[i] | ||
|
||
loo_X = np_remove(X, i) | ||
loo_y = np_remove(y, i) | ||
loo_b = lin_reg(loo_X, loo_y) | ||
|
||
y_hat = np.matmul(holdout_X, loo_b) | ||
y_pred.append(y_hat) | ||
|
||
r2 = r2_score(y, y_pred) | ||
|
||
return r2 | ||
|
||
|
||
def predict(feats:list[float], beta:list[float]) -> float: | ||
""" | ||
Predicts the Greenhouse Gas Emissions for an inividual user in ktonnes. | ||
:param feats: The unpadded input features from the user: | ||
- Motor Gassoline in ktoes | ||
- Household Energy in TJ | ||
:param beta: The slopes (and intercept) for the trained model of shape: (3,) | ||
:returns: The predicted greenhouse gass emission in CO2 equiventlents | ||
measured in ktonnes | ||
""" | ||
x = np.concatenate([[1], np.array(feats, dtype=np.float64)]) | ||
beta = np.array(beta, dtype=np.float64) | ||
y_hat = np.matmul(x, beta) | ||
|
||
return y_hat | ||
|
||
|
||
|
||
def melt_smdx_dataframe(df: pd.DataFrame) -> pd.DataFrame: | ||
""" | ||
Given an ESTAT smdx dataframe, convert the datetimes to years and melt | ||
:param df: The raw SDMX parsed dataframe from ESTAT | ||
:returns: A melted dataframe with the columns of: | ||
`year` - the year of the observation | ||
`geo` - the country of the observation | ||
`value` - the value of the observation | ||
""" | ||
df = df.reset_index() | ||
df["year"] = df["TIME_PERIOD"].dt.year | ||
df = df.drop("TIME_PERIOD", axis=1) | ||
return pd.melt(df, id_vars="year") | ||
|
||
def merge_dataframes(dataframes: list[pd.DataFrame]) -> pd.DataFrame: | ||
""" | ||
""" | ||
for i, df in enumerate(dataframes): | ||
df.columns = ["geo", "year", i] | ||
|
||
merged_df = reduce(lambda l, r: pd.merge(l, r, left_on=["year", "geo"], right_on=["year", "geo"]), dataframes) | ||
return merged_df | ||
|
||
def fill_holes(df: pd.DataFrame) -> pd.DataFrame: | ||
""" | ||
""" | ||
lin_reg = lambda X, Y: np.matmul(np.linalg.inv(np.matmul(X.T, X)), np.matmul(X.T, Y)) | ||
|
||
dfs = [] | ||
|
||
for name, group in df.groupby('geo'): | ||
cols = [[name for _ in range(len(group.index))]] | ||
for i in range(1, len(group.columns)): | ||
d = group.iloc[:, i:i+1].to_numpy() | ||
|
||
missing_mask = np.isnan(d) | (d == 0) | ||
present_mask = ~missing_mask | ||
|
||
missing_mask = missing_mask.reshape(1, -1)[0] | ||
present_mask = present_mask.reshape(1, -1)[0] | ||
|
||
if not np.any(missing_mask): | ||
d = d.reshape(1, -1)[0] | ||
cols.append(d) | ||
continue | ||
|
||
if not np.any(present_mask): | ||
d = d.reshape(1, -1)[0] | ||
cols.append(d) | ||
continue | ||
|
||
x_present = np.pad(np.arange(len(d))[present_mask].reshape(-1, 1), ((0, 0), (1, 0)), mode="constant", constant_values=1) | ||
y_present = d[present_mask] | ||
|
||
w = lin_reg(x_present, y_present) | ||
|
||
x_missing = np.pad(np.arange(len(d))[missing_mask].reshape(-1, 1), ((0, 0), (1, 0)), mode="constant", constant_values=1) | ||
y_missing_pred = np.matmul(x_missing, w) | ||
|
||
d[missing_mask] = y_missing_pred | ||
d = d.reshape(1, -1)[0] | ||
|
||
cols.append(d) | ||
|
||
dfs.append(pd.DataFrame(cols).T) | ||
|
||
df_unswissed = pd.concat(dfs, axis=0) | ||
df_unswissed.columns = df.columns | ||
return df_unswissed | ||
|
||
def standardize(df: pd.DataFrame) -> pd.DataFrame: | ||
""" | ||
""" | ||
df_standard = pd.DataFrame() | ||
for feat in df.columns: | ||
if feat == "geo": continue | ||
df_standard[f'{feat}'] = ((df[feat] - df[feat].mean()) / df[feat].std()) | ||
df_standard["geo"] = df["geo"] | ||
|
||
return df_standard |
Oops, something went wrong.