Skip to content

Commit

Permalink
Updates
Browse files Browse the repository at this point in the history
  • Loading branch information
cboulanger committed Oct 13, 2023
1 parent ce6e221 commit 73d3589
Show file tree
Hide file tree
Showing 9 changed files with 520 additions and 468 deletions.
Binary file modified docs/jls-journal-network-communities-graph-openalex.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/jls-journal-network-communities-graph-wos.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
539 changes: 301 additions & 238 deletions docs/jls-journal-network-communities-table-openalex.html

Large diffs are not rendered by default.

326 changes: 124 additions & 202 deletions docs/jls-journal-network-communities-table-wos.html

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions docs/jls-journal-network-openalex.html

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions docs/jls-journal-network-wos.html

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions jls-article-figures.ipynb

Large diffs are not rendered by default.

64 changes: 64 additions & 0 deletions jls-network-over-time.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
library(network)
library(networkDynamic)
library(tidyverse)
library(ndtv)

# Read the data
data <- read.csv("data/jls-journal-network-openalex.csv") |>
filter(citation_year >= 1974)

# Create a lookup table for journal titles to IDs
titles <- unique(c(data$source_title_citing, data$source_title_cited))
title_lookup <- tibble(name = titles, id = seq_along(titles))

# Convert source and target titles to ids
data <- data |>
left_join(title_lookup, by = c("source_title_citing" = "name")) |>
rename(source_id = id) |>
left_join(title_lookup, by = c("source_title_cited" = "name")) |>
rename(target_id = id)

# Create the network using an edgelist
net <- network(matrix(c(data$source_id, data$target_id), ncol = 2), directed = TRUE, loops = FALSE)
network.vertex.names(net) <- titles

# Create networkDynamic object
dynNet <- networkDynamic(net)

# Create a unique identifier for each edge in the original data
data <- data %>% mutate(edge_id_original = paste0(source_id, "-", target_id))

# Initialize a vector to hold the edge IDs from the network object
edge_ids_network <- numeric(nrow(data))

# Create a progress bar
pb <- txtProgressBar(min = 0, max = nrow(data), style = 3)

# Loop through the data to find the corresponding edge IDs in the network object
for (i in seq_len(nrow(data))) {
# Update progress bar
setTxtProgressBar(pb, i)

edge <- data[i, ]
edge_ids_network[i] <- which.edge(net, tail = edge$source_id, head = edge$target_id)
}

# Close progress bar
close(pb)

# Add these edge IDs as a new column to the original data
data$edge_id_network <- edge_ids_network

# Activate edges
activate.edges(dynNet, onset = data$citation_year, terminus = data$citation_year, e = data$edge_id_network)

# Add vertex activity
activate.vertices(dynNet, onset = min(data$citation_year, na.rm = TRUE), terminus = max(data$citation_year, na.rm = TRUE), v = seq_len(network.size(net)))

# Add count_citations as an edge attribute
set.edge.attribute(dynNet, "count_citations", data$count_citations)

# Render the movie
render.d3movie(dynNet, launchBrowser = TRUE)


37 changes: 20 additions & 17 deletions jls-network.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# see also https://www.r-bloggers.com/2019/06/interactive-network-visualization-with-r/
# todo: statistical validation of results: https://cran.r-project.org/web/packages/robin/vignettes/robin.html

library(igraph)
library(tidyr)
Expand All @@ -17,25 +18,27 @@ library(dplyr)
# configuraion
min_year <- 0
min_all_years <- 5
ignore_journals <- c('Sustainability',
'Choice Reviews Online',
'RePEc: Research Papers in Economics',
"DOAJ (DOAJ: Directory of Open Access Journals)")
louvain_cluster_resolution <- 1
ignore_journals <- c('sustainability',
'choice reviews online',
'repec: research papers in economics',
"doaj (doaj: directory of open access journals)")
# data source
journal_id <- "jls"
data_vendor <- "openalex"
data_vendor <- "wos"
data_file <- paste0("data/", journal_id, "-journal-network-", data_vendor, ".csv")

# dataframe with columns source_title1, source_title2, count_citations
df <- read.csv(data_file)

# remove self-citations
df <- df |>
filter(!(source_title_citing %in% ignore_journals) & !(source_title_cited %in% ignore_journals)) |>
df <- read.csv(data_file) |>
# remove journals that have a high citation rate but are not relevant for our question
filter(!(str_to_lower(source_title_citing) %in% ignore_journals) &
!(str_to_lower(source_title_cited) %in% ignore_journals)) |>
# remove self-citations
filter(source_title_citing != source_title_cited) |>
# remove journals which do not meet a minimum of citations per year
filter(count_citations >= min_year)

# Calculate total citations made by each journal per year and normalized weight
# Calculate total citations made by each journal per year and compute normalized weight
citing_totals_per_year <- df |>
group_by(source_title_citing, citation_year) |>
summarise(total_citing = sum(count_citations))
Expand All @@ -45,8 +48,7 @@ df <- df |>

# Create list of nodes with id and label
all_nodes <- data.frame(label = unique(c(df$source_title_citing, df$source_title_cited))) |>
mutate(id=seq_along(label)) |>
select(id, label)
mutate(id=seq_along(label))

# Create list of edges
edges <- df |>
Expand Down Expand Up @@ -74,16 +76,17 @@ edges <- edges |>
from = first(from),
to = first(to),
total_count = sum(count),
combined_median_weight = median(c(rep(median_weight, count))),
combined_weight = median(weight, na.rm = TRUE),
label_AB = sum(if(from < to) count else 0), # A->B
label_BA = sum(if(from > to) count else 0) # B->A
) |>
# Create the desired label and weight
mutate(
label = paste(label_AB, "/", label_BA),
weight = combined_median_weight
weight = combined_weight,
size = combined_weight * 10
) |>
select(from, to, label, weight)
select(from, to, label, weight, size)

# Filter out nodes without edges
edges_node_ids <- unique(c(edges$from, edges$to))
Expand All @@ -99,7 +102,7 @@ largest_comp_id <- which.max(comps$csize)
graph <- induced_subgraph(graph, which(comps$membership == largest_comp_id))

# Louvain Comunity Detection
cluster <- cluster_louvain(graph)
cluster <- cluster_louvain(graph, resolution = louvain_cluster_resolution)
cluster_groups <- membership(cluster)
cluster_df <- tibble(group=cluster_groups) |>
mutate(id = as.integer(row_number())) |>
Expand Down

0 comments on commit 73d3589

Please sign in to comment.