Skip to content

Commit

Permalink
Phase3 OVER WOOOOO
Browse files Browse the repository at this point in the history
  • Loading branch information
aahiltn authored Jun 6, 2024
2 parents d45cce3 + 13f1386 commit a13089b
Show file tree
Hide file tree
Showing 59 changed files with 18,585 additions and 11,507 deletions.
176 changes: 176 additions & 0 deletions api/backend/enterprises/enterprises_routes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
########################################################
# Sample customers blueprint of endpoints
# Remove this file if you are not using it in your project
########################################################

from flask import Blueprint, request, jsonify, make_response, current_app
import json
from backend.db_connection import db


enterprises = Blueprint('enterprises', __name__)

# get all of the enterprise tags from database
@enterprises.route('/tags', methods=['GET'])
def get_tags():
# get a cursor object from the database
cursor = db.get_db().cursor()

cursor.execute('''
SELECT description
FROM EmissionTags
WHERE EmissionTags.id IN (
SELECT tag_id
FROM EntTags
WHERE EntTags.enterprise_id = 1
);
''')

# grab the column headers from the returned data
column_headers = [x[0] for x in cursor.description]

# create an empty dictionary object to use in
# putting column headers together with data
json_data = []

# fetch all the data from the cursor
theData = cursor.fetchall()

# for each of the rows, zip the data elements together with
# the column headers.
for row in theData:
json_data.append(dict(zip(column_headers, row)))

return jsonify(json_data)


# get all of the matching NGO's based on tags
@enterprises.route('/NGOMatch', methods=['GET'])
def get_matches():
# get a cursor object from the database
cursor = db.get_db().cursor()

cursor.execute('''
SELECT NGO.name, EmissionTags.description
FROM NGO
JOIN NGOTags ON NGO.id = NGOTags.ngo_id
JOIN EmissionTags ON NGOTags.tag_id = EmissionTags.id
WHERE EmissionTags.id IN (
SELECT tag_id
FROM EntTags
WHERE EntTags.enterprise_id = 1
);
''')

# grab the column headers from the returned data
column_headers = [x[0] for x in cursor.description]

# create an empty dictionary object to use in
# putting column headers together with data
json_data = []

# fetch all the data from the cursor
theData = cursor.fetchall()

# for each of the rows, zip the data elements together with
# the column headers.
for row in theData:
json_data.append(dict(zip(column_headers, row)))

return jsonify(json_data)


# get my emissions, my country's, and avg other companies in same country emissions
@enterprises.route('/EntCompare', methods=['GET'])
def get_comparison():
# get a cursor object from the database
cursor = db.get_db().cursor()

cursor.execute('''
SELECT AVG(Enterprises.emission_result) AS 'Average Emission (by Country)',
Country.name AS 'Country',
(SELECT e2.emission_result
FROM Enterprises e2
WHERE e2.id = 1) AS 'Your Emissions'
FROM Enterprises
JOIN Country ON Enterprises.country_id = Country.id
WHERE Country.name =
(SELECT Country.name
FROM Enterprises
JOIN Country ON Enterprises.country_id = Country.id
WHERE Enterprises.id = 1
LIMIT 1)
GROUP BY Country.name;
''')

# grab the column headers from the returned data
column_headers = [x[0] for x in cursor.description]

# create an empty dictionary object to use in
# putting column headers together with data
json_data = []

# fetch all the data from the cursor
theData = cursor.fetchall()

# for each of the rows, zip the data elements together with
# the column headers.
for row in theData:
json_data.append(dict(zip(column_headers, row)))

return jsonify(json_data)


# Get all the supply chain history for this enterprise
@enterprises.route('/EntSupplyChain', methods=['GET'])
def get_supplychain():
cursor = db.get_db().cursor()

cursor.execute('SELECT * FROM SupplyChain WHERE SupplyChain.enterprise_id = 1')

column_headers = [x[0] for x in cursor.description]

json_data = []

theData = cursor.fetchall()

for row in theData:
json_data.append(dict(zip(column_headers, row)))

return jsonify(json_data)

# Get all the operating cost history for this enterprise
@enterprises.route('/EntCosts', methods=['GET'])
def get_costs():
cursor = db.get_db().cursor()

cursor.execute('SELECT * FROM operatingEmission WHERE operatingEmission.enterprise_id = 1')

column_headers = [x[0] for x in cursor.description]

json_data = []

theData = cursor.fetchall()

for row in theData:
json_data.append(dict(zip(column_headers, row)))

return jsonify(json_data)

# Get all the flights history for this enterprise
@enterprises.route('/EntFlights', methods=['GET'])
def get_flights():
cursor = db.get_db().cursor()

cursor.execute('SELECT * FROM Flight WHERE Flight.enterprise_id = 1')

column_headers = [x[0] for x in cursor.description]

json_data = []

theData = cursor.fetchall()

for row in theData:
json_data.append(dict(zip(column_headers, row)))

return jsonify(json_data)
File renamed without changes.
212 changes: 212 additions & 0 deletions api/backend/ml_models/model_alpha.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
"""
The Train, Test, and Predict functions for the CO2 Emission Linear Regression
ML Model
"""

import numpy as np
import pandas as pd
import pandasdmx as sdmx
from sklearn.metrics import r2_score
from functools import reduce
import pandasdmx as sdmx

def train() -> np.array:
"""
Calculates the slopes for the CO2 emissions regression model.
:returns: An array with the slopes in shape (3,)
"""
estat = sdmx.Request("ESTAT")
resp = estat.data(
"ENV_AIR_GGE",
key={
"unit": "THS_T",
"freq": "A",
"src_crf": "TOTX4_MEMONIA",
"airpol": "GHG"
}
)
emission_df = (resp
.to_pandas(datetime={'dim': 'TIME_PERIOD'})
.droplevel(level=['unit', 'freq', 'src_crf', 'airpol'], axis=1))
melted_emissions_df = melt_smdx_dataframe(emission_df)

resp = estat.data(
"NRG_D_HHQ",
key={
"siec": "TOTAL",
"unit": "TJ",
"nrg_bal": "FC_OTH_HH_E",
"freq": "A",
}
)
household_energy_df = (resp
.to_pandas(datetime={'dim': 'TIME_PERIOD', 'freq': 'freq'})
.droplevel(level=["siec", "unit", "nrg_bal"], axis=1))
melted_household_energy_df = melt_smdx_dataframe(household_energy_df)

resp = estat.data(
"TEN00127",
key={
"unit": "KTOE",
"freq": "A",
"siec": "O4652XR5210B",
"nrg_bal": "FC_TRA_ROAD_E"
}
)
gas_df = (resp
.to_pandas(datetime={'dim': 'TIME_PERIOD'})
.droplevel(level=['unit', 'freq', 'siec', "nrg_bal"], axis=1))
melted_gas_df = melt_smdx_dataframe(gas_df)

merged_df = merge_dataframes([melted_emissions_df,
melted_household_energy_df,
melted_gas_df])
merged_df.columns = ["year", "geo", "emissions", "energy", "gas"]
merged_df = merged_df.drop(merged_df[(merged_df.geo == "EU27_2020") |
(merged_df.geo == "EU20")].index)
merged_df = merged_df.drop("year", axis=1)
standard_df = standardize(merged_df)

df_dummies = pd.get_dummies(standard_df, dtype=int, columns=["geo"])
df_dummies = df_dummies.fillna(0)

#X = np.pad(df_dummies.iloc[:, 1:].to_numpy(dtype=np.float64),
# ((0,0), (1,0)), mode="constant", constant_values=1)
X = np.pad(standard_df.iloc[:,1:3].to_numpy(dtype=np.float64),
((0,0), (1,0)), mode="constant", constant_values=1)
y = np.array(df_dummies["emissions"], dtype=np.float64)

m = np.matmul(np.linalg.inv(np.matmul(X.T, X)), np.matmul(X.T, y))

return m


def test(X: np.array, y: np.array) -> any:
"""
Tests the CO2 emissions regression model
:param X: The padded X features
:param y: The y features
:returns: The R2 value of the model w/ LOO-CV
"""
np_remove = lambda a, i: np.concatenate([a[:i,], a[i + 1:,]])
lin_reg = lambda X, Y: np.matmul(np.linalg.inv(np.matmul(X.T, X)),
np.matmul(X.T, Y))

y_pred = []
for i in range(len(X)):
holdout_X = X[i]

loo_X = np_remove(X, i)
loo_y = np_remove(y, i)
loo_b = lin_reg(loo_X, loo_y)

y_hat = np.matmul(holdout_X, loo_b)
y_pred.append(y_hat)

r2 = r2_score(y, y_pred)

return r2


def predict(feats:list[float], beta:list[float]) -> float:
"""
Predicts the Greenhouse Gas Emissions for an inividual user in ktonnes.
:param feats: The unpadded input features from the user:
- Motor Gassoline in ktoes
- Household Energy in TJ
:param beta: The slopes (and intercept) for the trained model of shape: (3,)
:returns: The predicted greenhouse gass emission in CO2 equiventlents
measured in ktonnes
"""
x = np.concatenate([[1], np.array(feats, dtype=np.float64)])
beta = np.array(beta, dtype=np.float64)
y_hat = np.matmul(x, beta)

return y_hat



def melt_smdx_dataframe(df: pd.DataFrame) -> pd.DataFrame:
"""
Given an ESTAT smdx dataframe, convert the datetimes to years and melt
:param df: The raw SDMX parsed dataframe from ESTAT
:returns: A melted dataframe with the columns of:
`year` - the year of the observation
`geo` - the country of the observation
`value` - the value of the observation
"""
df = df.reset_index()
df["year"] = df["TIME_PERIOD"].dt.year
df = df.drop("TIME_PERIOD", axis=1)
return pd.melt(df, id_vars="year")

def merge_dataframes(dataframes: list[pd.DataFrame]) -> pd.DataFrame:
"""
"""
for i, df in enumerate(dataframes):
df.columns = ["geo", "year", i]

merged_df = reduce(lambda l, r: pd.merge(l, r, left_on=["year", "geo"], right_on=["year", "geo"]), dataframes)
return merged_df

def fill_holes(df: pd.DataFrame) -> pd.DataFrame:
"""
"""
lin_reg = lambda X, Y: np.matmul(np.linalg.inv(np.matmul(X.T, X)), np.matmul(X.T, Y))

dfs = []

for name, group in df.groupby('geo'):
cols = [[name for _ in range(len(group.index))]]
for i in range(1, len(group.columns)):
d = group.iloc[:, i:i+1].to_numpy()

missing_mask = np.isnan(d) | (d == 0)
present_mask = ~missing_mask

missing_mask = missing_mask.reshape(1, -1)[0]
present_mask = present_mask.reshape(1, -1)[0]

if not np.any(missing_mask):
d = d.reshape(1, -1)[0]
cols.append(d)
continue

if not np.any(present_mask):
d = d.reshape(1, -1)[0]
cols.append(d)
continue

x_present = np.pad(np.arange(len(d))[present_mask].reshape(-1, 1), ((0, 0), (1, 0)), mode="constant", constant_values=1)
y_present = d[present_mask]

w = lin_reg(x_present, y_present)

x_missing = np.pad(np.arange(len(d))[missing_mask].reshape(-1, 1), ((0, 0), (1, 0)), mode="constant", constant_values=1)
y_missing_pred = np.matmul(x_missing, w)

d[missing_mask] = y_missing_pred
d = d.reshape(1, -1)[0]

cols.append(d)

dfs.append(pd.DataFrame(cols).T)

df_unswissed = pd.concat(dfs, axis=0)
df_unswissed.columns = df.columns
return df_unswissed

def standardize(df: pd.DataFrame) -> pd.DataFrame:
"""
"""
df_standard = pd.DataFrame()
for feat in df.columns:
if feat == "geo": continue
df_standard[f'{feat}'] = ((df[feat] - df[feat].mean()) / df[feat].std())
df_standard["geo"] = df["geo"]

return df_standard
Loading

0 comments on commit a13089b

Please sign in to comment.