-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjsonfile.R
54 lines (41 loc) · 1.7 KB
/
jsonfile.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
install.packages("jsonlite") # do the converting
install.packages("tidyverse") # clean up data
library(jsonlite)
library(dplyr)
df <- fromJSON("[PATH_TO_JSON_FILE]/structures.json")
# To extract IDs from the structure_id_path col.
extract_ids <- function(path) {
as.numeric(unlist(strsplit(gsub("c\\(|\\)", "", path), ",")))
}
# Create a dataframe to store id and parent-child relationships
relationships <- data.frame(id = numeric(), parent = numeric(), child = numeric())
# Loop through each row of the dataframe to create id-parent-child relationships
for (i in 1:nrow(df)) {
# Extract the IDs from the structure_id_path
ids <- extract_ids(df$structure_id_path[i])
# Extract the current id and its parent
current_id <- ids[length(ids)]
parent_id <- ifelse(length(ids) > 1, ids[length(ids) - 1], NA)
# Add to relationships
relationships <- rbind(relationships, data.frame(id = current_id, parent = parent_id, child = current_id))
# Generate parent-child relationships for each path
if (length(ids) > 1) {
parent_child <- data.frame(id = current_id, parent = ids[-length(ids)], child = ids[-1])
relationships <- rbind(relationships, parent_child)
}
}
# Remove duplicate relationships
relationships <- unique(relationships)
# Group children by id
tree_jsn <- relationships %>%
group_by(id) %>%
summarize(
parent_structure_id = first(parent),
children = paste(unique(child[child != id]), collapse = ", ")
)
# Merge with the original data frame to include 'id', 'parent_structure_id', and 'children'
final_treejsn <- df %>%
select(id, structure_id_path) %>%
left_join(tree_jsn, by = "id") %>%
select(id, parent_structure_id, children)
tree_json <- as.data.frame(final_treejsn)