-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataset_json_generate.py
58 lines (41 loc) · 1.24 KB
/
dataset_json_generate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import pandas as pd
import json
df = pd.read_excel('road accident dataset.xlsx',sheet_name="Merged")
# print(df)
# print(type(df))
# for index, row in df.iterrows():
# # Access row data using row['column_name'] or row[column_index]
# print(row)
# print(type(row))
# for column_name, column_data in df.items():
# print(column_name, column_data)
# print(df['article_content'])
# json_dataset = [
# ]
json_dataset = {
"data" : []
}
for index, row in df.iterrows():
# Iterate through each column in the row
text_input = ""
json_output = {}
for column_name, cell_value in row.items():
if column_name == "article_content":
text_input=cell_value
else:
json_output[column_name] = cell_value
print(f'Row: {index}, Column: {column_name}, Value: {cell_value}')
json_dataset["data"].append({
"text_input" : text_input,
"output" : str(json_output)
})
# obj = {
# "inputs" : [],
# "outputs" : []
# }
# obj["inputs"].append(text_input)
# obj["outputs"].append(str(json_output))
# json_dataset.append(obj)
print(json_dataset)
with open("dataset.json", "w") as json_file:
json.dump(json_dataset, json_file, indent=4)