-
Notifications
You must be signed in to change notification settings - Fork 0
/
bargraph.py
83 lines (60 loc) · 2.87 KB
/
bargraph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
x_axis = "Distance"
# Load data and select relevant columns for the first CSV
df1 = pd.read_csv("Generated Files/DR_USA_Roundabout_EP/Predicted Results - P(Y) and Y.csv")
df1 = df1[[x_axis, 'p(y)']]
# Sort dataframe by distance in ascending order for the first CSV
df1 = df1.sort_values(x_axis)
# Find the range where the majority of values lie
majority_range = (df1[x_axis].quantile(0.25), df1[x_axis].quantile(0.75))
# Calculate the interval size for selecting 10 rows
num_rows = 10
# Calculate the gap for the Distance column
distance_gap = (majority_range[1] - majority_range[0]) / (num_rows - 1)
# Select rows based on the interval and gap for the first CSV
selected_rows = []
for i in range(num_rows):
target_distance = majority_range[0] + i * distance_gap
closest_row = df1.iloc[(df1[x_axis] - target_distance).abs().argsort()[:1]] # Find the closest row
selected_rows.append(closest_row)
# Convert selected rows to a DataFrame for the first CSV
selected_df1 = pd.concat(selected_rows)
selected_df1 = selected_df1.reset_index(drop=True)
# Create bar graph for the first CSV
plt.bar(selected_df1.index, selected_df1['p(y)'], width=0.25, align='center', label='After')
# Load data and select relevant columns for the second CSV
df2 = pd.read_csv("Generated Files/DR_USA_Roundabout_EP/Predicted Results - P(Y) and Y2.csv")
df2 = df2[[x_axis, 'p(y)']]
# Sort dataframe by distance in ascending order for the second CSV
df2 = df2.sort_values(x_axis)
# Select rows based on the interval and gap for the second CSV
selected_rows2 = []
for _, row in selected_df1.iterrows():
selected_row = df2[df2[x_axis] == row[x_axis]]
selected_rows2.append(selected_row)
# Convert selected rows to a DataFrame for the second CSV
selected_df2 = pd.concat(selected_rows2)
selected_df2 = selected_df2.reset_index(drop=True)
# Adjust x-coordinates for the bars of the second CSV
x_coordinates = selected_df1.index + 0.4
# Create bar graph for the second CSV
plt.bar(x_coordinates, selected_df2['p(y)'], width=0.25, align='center', label='Before', alpha=0.5, color="orange")
# Set x-axis ticks and labels
plt.xticks(selected_df1.index, [f"{round(majority_range[0] + i * distance_gap, 2)}" for i in range(num_rows)], fontsize=5)
plt.rcParams['font.size'] = 8
# Add labels and title
plt.ylim((0,1.8))
#plt.yticks([])
plt.xlabel(x_axis)
plt.ylabel("P (Y)")
plt.text(5, 1.6, 'My Plot Title', fontsize=8, ha='center')
for i, value in enumerate(selected_df1['p(y)']):
plt.text(i, value + 0.1, str(round(value, 2)), ha='center', rotation='vertical', fontsize=8)
for i, value in enumerate(selected_df2['p(y)']):
plt.text(i + 0.4, value + 0.1, str(round(value, 2)), ha='center', rotation='vertical', fontsize=8)
# Add legend
plt.legend()
# Show plot
plt.show()