-
Notifications
You must be signed in to change notification settings - Fork 125
/
Copy pathdata_analysis (1).py
57 lines (40 loc) · 1.59 KB
/
data_analysis (1).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# -*- coding: utf-8 -*-
"""Data analysis.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1rgZ1KKaswBXKXYF_m5svCLsk6eBzLBT9
"""
!pip install streamlit
import pandas as pd
import streamlit as st
import seaborn as sns
import matplotlib.pyplot as plt
def load_and_scatterplot(year):
file_path = f"Survey_results_sample_{year}.csv"
try:
data = pd.read_csv(file_path)
except FileNotFoundError:
st.error(f"File for the year {year} not found!")
return None
cols = ['Country', 'YearsCodePro', 'ConvertedCompYearly', 'DevType']
filtered_data = data[cols].dropna()
filtered_data['YearsCodePro'] = pd.to_numeric(filtered_data['YearsCodePro'], errors='coerce')
filtered_data = filtered_data.dropna(subset=['YearsCodePro'])
# Create a scatter plot for YearsCodePro vs ConvertedCompYearly, color-coded by DevType
st.write(f"Scatter Plot: Years of Professional Coding Experience vs Yearly Compensation for {year}")
plt.figure(figsize=(14, 8))
scatter = sns.scatterplot(
data=filtered_data,
x='YearsCodePro',
y='ConvertedCompYearly',
hue='DevType',
style='Country',
palette='deep',
s=100,
alpha=0.6
)
scatter.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Developer Type')
plt.title(f'YearsCodePro vs ConvertedCompYearly ({year}), colored by DevType', fontsize=16)
plt.xlabel('Years of Professional Coding Experience', fontsize=14)
plt.ylabel('Yearly Compensation (USD)', fontsize=14)
st.pyplot(plt)