forked from dataprofessor/youtube-data-app
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfootball_app.py
73 lines (62 loc) · 2.78 KB
/
football_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import streamlit as st
import pandas as pd
import base64
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
st.title('NFL Football Stats (Rushing) Explorer')
st.markdown("""
This app performs simple webscraping of NFL Football player stats data (focusing on Rushing)!
* **Python libraries:** base64, pandas, streamlit, numpy, matplotlib, seaborn
* **Data source:** [pro-football-reference.com](https://www.pro-football-reference.com/).
""")
st.sidebar.header('User Input Features')
selected_year = st.sidebar.selectbox('Year', list(reversed(range(1990,2020))))
# Web scraping of NFL player stats
# https://www.pro-football-reference.com/years/2019/rushing.htm
@st.cache_data
def load_data(year):
url = "https://www.pro-football-reference.com/years/" + str(year) + "/rushing.htm"
html = pd.read_html(url, header = 1)
df = html[0]
raw = df.drop(df[df.Age == 'Age'].index) # Deletes repeating headers in content
raw = raw.fillna(0)
# Drop rows with missing values
df = df.dropna()
playerstats = raw.drop(['Rk'], axis=1)
return playerstats
playerstats = load_data(selected_year)
# Sidebar - Team selection
sorted_unique_team = sorted(playerstats.Tm.unique())
selected_team = st.sidebar.multiselect('Team', sorted_unique_team, sorted_unique_team)
# Sidebar - Position selection
unique_pos = ['RB','QB','WR','FB','TE']
selected_pos = st.sidebar.multiselect('Position', unique_pos, unique_pos)
# Filtering data
df_selected_team = playerstats[(playerstats.Tm.isin(selected_team)) & (playerstats.Pos.isin(selected_pos))]
st.header('Display Player Stats of Selected Team(s)')
st.write('Data Dimension: ' + str(df_selected_team.shape[0]) + ' rows and ' + str(df_selected_team.shape[1]) + ' columns.')
st.dataframe(df_selected_team)
# Download NBA player stats data
# https://discuss.streamlit.io/t/how-to-download-file-in-streamlit/1806
def filedownload(df):
csv = df.to_csv(index=False)
b64 = base64.b64encode(csv.encode()).decode() # strings <-> bytes conversions
href = f'<a href="data:file/csv;base64,{b64}" download="playerstats.csv">Download CSV File</a>'
return href
st.markdown(filedownload(df_selected_team), unsafe_allow_html=True)
# Heatmap
if st.button('Intercorrelation Heatmap'):
st.header('Intercorrelation Matrix Heatmap')
df_selected_team.to_csv('output.csv',index=False)
df = pd.read_csv('output.csv', encoding='utf-8')
# Exclude non-numeric columns before computing correlation
numeric_columns = df.select_dtypes(include=[np.number]).columns
corr = df[numeric_columns].corr()
#corr = df.corr()
mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask)] = True
with sns.axes_style("white"):
f, ax = plt.subplots(figsize=(7, 5))
ax = sns.heatmap(corr, mask=mask, vmax=1, square=True)
st.pyplot(f)